diff options
Diffstat (limited to 'hw/virtio')
66 files changed, 3894 insertions, 847 deletions
diff --git a/hw/virtio/Kconfig b/hw/virtio/Kconfig index aa63ff7..7648a2d 100644 --- a/hw/virtio/Kconfig +++ b/hw/virtio/Kconfig @@ -6,6 +6,10 @@ config VIRTIO_RNG default y depends on VIRTIO +config VIRTIO_NSM + bool + depends on LIBCBOR && VIRTIO + config VIRTIO_IOMMU bool default y @@ -16,6 +20,7 @@ config VIRTIO_PCI default y if PCI_DEVICES depends on PCI select VIRTIO + select VIRTIO_MD_SUPPORTED config VIRTIO_MMIO bool @@ -24,6 +29,7 @@ config VIRTIO_MMIO config VIRTIO_CCW bool select VIRTIO + select VIRTIO_MD_SUPPORTED config VIRTIO_BALLOON bool @@ -35,10 +41,17 @@ config VIRTIO_CRYPTO default y depends on VIRTIO +# not all virtio transports support memory devices; if none does, +# no need to include the code +config VIRTIO_MD_SUPPORTED + bool + config VIRTIO_MD bool + depends on VIRTIO_MD_SUPPORTED select MEM_DEVICE +# selected by the board if it has the required support code config VIRTIO_PMEM_SUPPORTED bool @@ -46,9 +59,11 @@ config VIRTIO_PMEM bool default y depends on VIRTIO + depends on VIRTIO_MD_SUPPORTED depends on VIRTIO_PMEM_SUPPORTED select VIRTIO_MD +# selected by the board if it has the required support code config VIRTIO_MEM_SUPPORTED bool @@ -57,6 +72,7 @@ config VIRTIO_MEM default y depends on VIRTIO depends on LINUX + depends on VIRTIO_MD_SUPPORTED depends on VIRTIO_MEM_SUPPORTED select VIRTIO_MD @@ -109,4 +125,4 @@ config VHOST_USER_SND config VHOST_USER_SCMI bool default y - depends on VIRTIO && VHOST_USER + depends on VIRTIO && VHOST_USER && ARM diff --git a/hw/virtio/cbor-helpers.c b/hw/virtio/cbor-helpers.c new file mode 100644 index 0000000..49f55df --- /dev/null +++ b/hw/virtio/cbor-helpers.c @@ -0,0 +1,321 @@ +/* + * QEMU CBOR helpers + * + * Copyright (c) 2024 Dorjoy Chowdhury <dorjoychy111@gmail.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. See the COPYING file in the + * top-level directory. + */ + +#include "hw/virtio/cbor-helpers.h" + +bool qemu_cbor_map_add(cbor_item_t *map, cbor_item_t *key, cbor_item_t *value) +{ + bool success = false; + struct cbor_pair pair = (struct cbor_pair) { + .key = cbor_move(key), + .value = cbor_move(value) + }; + + success = cbor_map_add(map, pair); + if (!success) { + cbor_incref(pair.key); + cbor_incref(pair.value); + } + + return success; +} + +bool qemu_cbor_array_push(cbor_item_t *array, cbor_item_t *value) +{ + bool success = false; + + success = cbor_array_push(array, cbor_move(value)); + if (!success) { + cbor_incref(value); + } + + return success; +} + +bool qemu_cbor_add_bool_to_map(cbor_item_t *map, const char *key, bool value) +{ + cbor_item_t *key_cbor = NULL; + cbor_item_t *value_cbor = NULL; + + key_cbor = cbor_build_string(key); + if (!key_cbor) { + goto cleanup; + } + value_cbor = cbor_build_bool(value); + if (!value_cbor) { + goto cleanup; + } + if (!qemu_cbor_map_add(map, key_cbor, value_cbor)) { + goto cleanup; + } + + return true; + + cleanup: + if (key_cbor) { + cbor_decref(&key_cbor); + } + if (value_cbor) { + cbor_decref(&value_cbor); + } + return false; +} + +bool qemu_cbor_add_uint8_to_map(cbor_item_t *map, const char *key, + uint8_t value) +{ + cbor_item_t *key_cbor = NULL; + cbor_item_t *value_cbor = NULL; + + key_cbor = cbor_build_string(key); + if (!key_cbor) { + goto cleanup; + } + value_cbor = cbor_build_uint8(value); + if (!value_cbor) { + goto cleanup; + } + if (!qemu_cbor_map_add(map, key_cbor, value_cbor)) { + goto cleanup; + } + + return true; + + cleanup: + if (key_cbor) { + cbor_decref(&key_cbor); + } + if (value_cbor) { + cbor_decref(&value_cbor); + } + return false; +} + +bool qemu_cbor_add_map_to_map(cbor_item_t *map, const char *key, + size_t nested_map_size, + cbor_item_t **nested_map) +{ + cbor_item_t *key_cbor = NULL; + cbor_item_t *value_cbor = NULL; + + key_cbor = cbor_build_string(key); + if (!key_cbor) { + goto cleanup; + } + value_cbor = cbor_new_definite_map(nested_map_size); + if (!value_cbor) { + goto cleanup; + } + if (!qemu_cbor_map_add(map, key_cbor, value_cbor)) { + goto cleanup; + } + *nested_map = value_cbor; + + return true; + + cleanup: + if (key_cbor) { + cbor_decref(&key_cbor); + } + if (value_cbor) { + cbor_decref(&value_cbor); + } + return false; +} + +bool qemu_cbor_add_bytestring_to_map(cbor_item_t *map, const char *key, + uint8_t *arr, size_t len) +{ + cbor_item_t *key_cbor = NULL; + cbor_item_t *value_cbor = NULL; + + key_cbor = cbor_build_string(key); + if (!key_cbor) { + goto cleanup; + } + value_cbor = cbor_build_bytestring(arr, len); + if (!value_cbor) { + goto cleanup; + } + if (!qemu_cbor_map_add(map, key_cbor, value_cbor)) { + goto cleanup; + } + + return true; + + cleanup: + if (key_cbor) { + cbor_decref(&key_cbor); + } + if (value_cbor) { + cbor_decref(&value_cbor); + } + return false; +} + +bool qemu_cbor_add_null_to_map(cbor_item_t *map, const char *key) +{ + cbor_item_t *key_cbor = NULL; + cbor_item_t *value_cbor = NULL; + + key_cbor = cbor_build_string(key); + if (!key_cbor) { + goto cleanup; + } + value_cbor = cbor_new_null(); + if (!value_cbor) { + goto cleanup; + } + if (!qemu_cbor_map_add(map, key_cbor, value_cbor)) { + goto cleanup; + } + + return true; + + cleanup: + if (key_cbor) { + cbor_decref(&key_cbor); + } + if (value_cbor) { + cbor_decref(&value_cbor); + } + return false; +} + +bool qemu_cbor_add_string_to_map(cbor_item_t *map, const char *key, + const char *value) +{ + cbor_item_t *key_cbor = NULL; + cbor_item_t *value_cbor = NULL; + + key_cbor = cbor_build_string(key); + if (!key_cbor) { + goto cleanup; + } + value_cbor = cbor_build_string(value); + if (!value_cbor) { + goto cleanup; + } + if (!qemu_cbor_map_add(map, key_cbor, value_cbor)) { + goto cleanup; + } + + return true; + + cleanup: + if (key_cbor) { + cbor_decref(&key_cbor); + } + if (value_cbor) { + cbor_decref(&value_cbor); + } + return false; +} + +bool qemu_cbor_add_uint8_array_to_map(cbor_item_t *map, const char *key, + uint8_t *arr, size_t len) +{ + cbor_item_t *key_cbor = NULL; + cbor_item_t *value_cbor = NULL; + + key_cbor = cbor_build_string(key); + if (!key_cbor) { + goto cleanup; + } + value_cbor = cbor_new_definite_array(len); + if (!value_cbor) { + goto cleanup; + } + + for (int i = 0; i < len; ++i) { + cbor_item_t *tmp = cbor_build_uint8(arr[i]); + if (!tmp) { + goto cleanup; + } + if (!qemu_cbor_array_push(value_cbor, tmp)) { + cbor_decref(&tmp); + goto cleanup; + } + } + if (!qemu_cbor_map_add(map, key_cbor, value_cbor)) { + goto cleanup; + } + + return true; + + cleanup: + if (key_cbor) { + cbor_decref(&key_cbor); + } + if (value_cbor) { + cbor_decref(&value_cbor); + } + return false; +} + +bool qemu_cbor_add_uint8_key_bytestring_to_map(cbor_item_t *map, uint8_t key, + uint8_t *buf, size_t len) +{ + cbor_item_t *key_cbor = NULL; + cbor_item_t *value_cbor = NULL; + + key_cbor = cbor_build_uint8(key); + if (!key_cbor) { + goto cleanup; + } + value_cbor = cbor_build_bytestring(buf, len); + if (!value_cbor) { + goto cleanup; + } + if (!qemu_cbor_map_add(map, key_cbor, value_cbor)) { + goto cleanup; + } + + return true; + + cleanup: + if (key_cbor) { + cbor_decref(&key_cbor); + } + if (value_cbor) { + cbor_decref(&value_cbor); + } + return false; +} + +bool qemu_cbor_add_uint64_to_map(cbor_item_t *map, const char *key, + uint64_t value) +{ + cbor_item_t *key_cbor = NULL; + cbor_item_t *value_cbor = NULL; + + key_cbor = cbor_build_string(key); + if (!key_cbor) { + goto cleanup; + } + value_cbor = cbor_build_uint64(value); + if (!value_cbor) { + goto cleanup; + } + if (!qemu_cbor_map_add(map, key_cbor, value_cbor)) { + goto cleanup; + } + + return true; + + cleanup: + if (key_cbor) { + cbor_decref(&key_cbor); + } + if (value_cbor) { + cbor_decref(&value_cbor); + } + return false; +} diff --git a/hw/virtio/iothread-vq-mapping.c b/hw/virtio/iothread-vq-mapping.c new file mode 100644 index 0000000..15909eb --- /dev/null +++ b/hw/virtio/iothread-vq-mapping.c @@ -0,0 +1,131 @@ +/* + * IOThread Virtqueue Mapping + * + * Copyright Red Hat, Inc + * + * SPDX-License-Identifier: GPL-2.0-only + */ + +#include "qemu/osdep.h" +#include "system/iothread.h" +#include "hw/virtio/iothread-vq-mapping.h" + +static bool +iothread_vq_mapping_validate(IOThreadVirtQueueMappingList *list, uint16_t + num_queues, Error **errp) +{ + g_autofree unsigned long *vqs = bitmap_new(num_queues); + g_autoptr(GHashTable) iothreads = + g_hash_table_new(g_str_hash, g_str_equal); + + for (IOThreadVirtQueueMappingList *node = list; node; node = node->next) { + const char *name = node->value->iothread; + uint16List *vq; + + if (!iothread_by_id(name)) { + error_setg(errp, "IOThread \"%s\" object does not exist", name); + return false; + } + + if (!g_hash_table_add(iothreads, (gpointer)name)) { + error_setg(errp, + "duplicate IOThread name \"%s\" in iothread-vq-mapping", + name); + return false; + } + + if (node != list) { + if (!!node->value->vqs != !!list->value->vqs) { + error_setg(errp, "either all items in iothread-vq-mapping " + "must have vqs or none of them must have it"); + return false; + } + } + + for (vq = node->value->vqs; vq; vq = vq->next) { + if (vq->value >= num_queues) { + error_setg(errp, "vq index %u for IOThread \"%s\" must be " + "less than num_queues %u in iothread-vq-mapping", + vq->value, name, num_queues); + return false; + } + + if (test_and_set_bit(vq->value, vqs)) { + error_setg(errp, "cannot assign vq %u to IOThread \"%s\" " + "because it is already assigned", vq->value, name); + return false; + } + } + } + + if (list->value->vqs) { + for (uint16_t i = 0; i < num_queues; i++) { + if (!test_bit(i, vqs)) { + error_setg(errp, + "missing vq %u IOThread assignment in iothread-vq-mapping", + i); + return false; + } + } + } + + return true; +} + +bool iothread_vq_mapping_apply( + IOThreadVirtQueueMappingList *list, + AioContext **vq_aio_context, + uint16_t num_queues, + Error **errp) +{ + IOThreadVirtQueueMappingList *node; + size_t num_iothreads = 0; + size_t cur_iothread = 0; + + if (!iothread_vq_mapping_validate(list, num_queues, errp)) { + return false; + } + + for (node = list; node; node = node->next) { + num_iothreads++; + } + + for (node = list; node; node = node->next) { + IOThread *iothread = iothread_by_id(node->value->iothread); + AioContext *ctx = iothread_get_aio_context(iothread); + + /* Released in virtio_blk_vq_aio_context_cleanup() */ + object_ref(OBJECT(iothread)); + + if (node->value->vqs) { + uint16List *vq; + + /* Explicit vq:IOThread assignment */ + for (vq = node->value->vqs; vq; vq = vq->next) { + assert(vq->value < num_queues); + vq_aio_context[vq->value] = ctx; + } + } else { + /* Round-robin vq:IOThread assignment */ + for (unsigned i = cur_iothread; i < num_queues; + i += num_iothreads) { + vq_aio_context[i] = ctx; + } + } + + cur_iothread++; + } + + return true; +} + +void iothread_vq_mapping_cleanup(IOThreadVirtQueueMappingList *list) +{ + IOThreadVirtQueueMappingList *node; + + for (node = list; node; node = node->next) { + IOThread *iothread = iothread_by_id(node->value->iothread); + object_unref(OBJECT(iothread)); + } +} + diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build index 621fc65..164f6fd 100644 --- a/hw/virtio/meson.build +++ b/hw/virtio/meson.build @@ -1,5 +1,6 @@ system_virtio_ss = ss.source_set() system_virtio_ss.add(files('virtio-bus.c')) +system_virtio_ss.add(files('iothread-vq-mapping.c')) system_virtio_ss.add(when: 'CONFIG_VIRTIO_PCI', if_true: files('virtio-pci.c')) system_virtio_ss.add(when: 'CONFIG_VIRTIO_MMIO', if_true: files('virtio-mmio.c')) system_virtio_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('virtio-crypto.c')) @@ -54,6 +55,7 @@ specific_virtio_ss.add(when: 'CONFIG_VIRTIO_PMEM', if_true: files('virtio-pmem.c specific_virtio_ss.add(when: 'CONFIG_VHOST_VSOCK', if_true: files('vhost-vsock.c')) specific_virtio_ss.add(when: 'CONFIG_VHOST_USER_VSOCK', if_true: files('vhost-user-vsock.c')) specific_virtio_ss.add(when: 'CONFIG_VIRTIO_RNG', if_true: files('virtio-rng.c')) +specific_virtio_ss.add(when: 'CONFIG_VIRTIO_NSM', if_true: [files('virtio-nsm.c', 'cbor-helpers.c'), libcbor]) specific_virtio_ss.add(when: 'CONFIG_VIRTIO_MEM', if_true: files('virtio-mem.c')) specific_virtio_ss.add(when: 'CONFIG_VHOST_USER_SCMI', if_true: files('vhost-user-scmi.c')) specific_virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_SCMI'], if_true: files('vhost-user-scmi-pci.c')) @@ -70,6 +72,7 @@ virtio_pci_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('virtio-crypto-pc virtio_pci_ss.add(when: 'CONFIG_VIRTIO_INPUT_HOST', if_true: files('virtio-input-host-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VIRTIO_INPUT', if_true: files('virtio-input-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VIRTIO_RNG', if_true: files('virtio-rng-pci.c')) +virtio_pci_ss.add(when: 'CONFIG_VIRTIO_NSM', if_true: [files('virtio-nsm-pci.c', 'cbor-helpers.c'), libcbor]) virtio_pci_ss.add(when: 'CONFIG_VIRTIO_BALLOON', if_true: files('virtio-balloon-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VIRTIO_9P', if_true: files('virtio-9p-pci.c')) virtio_pci_ss.add(when: 'CONFIG_VIRTIO_SCSI', if_true: files('virtio-scsi-pci.c')) @@ -87,7 +90,8 @@ specific_virtio_ss.add_all(when: 'CONFIG_VIRTIO_PCI', if_true: virtio_pci_ss) system_ss.add_all(when: 'CONFIG_VIRTIO', if_true: system_virtio_ss) system_ss.add(when: 'CONFIG_VIRTIO', if_false: files('vhost-stub.c')) system_ss.add(when: 'CONFIG_VIRTIO', if_false: files('virtio-stub.c')) -system_ss.add(when: 'CONFIG_VIRTIO_MD', if_false: files('virtio-md-stubs.c')) +system_ss.add(when: ['CONFIG_VIRTIO_MD', 'CONFIG_VIRTIO_PCI'], + if_false: files('virtio-md-stubs.c')) system_ss.add(files('virtio-hmp-cmds.c')) diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events index 3cf84e0..76f0d45 100644 --- a/hw/virtio/trace-events +++ b/hw/virtio/trace-events @@ -108,7 +108,7 @@ virtio_pci_notify_write(uint64_t addr, uint64_t val, unsigned int size) "0x%" PR virtio_pci_notify_write_pio(uint64_t addr, uint64_t val, unsigned int size) "0x%" PRIx64" = 0x%" PRIx64 " (%d)" # hw/virtio/virtio-iommu.c -virtio_iommu_device_reset(void) "reset!" +virtio_iommu_device_reset_exit(void) "reset!" virtio_iommu_system_reset(void) "system reset!" virtio_iommu_get_features(uint64_t features) "device supports features=0x%"PRIx64 virtio_iommu_device_status(uint8_t status) "driver status = %d" @@ -116,6 +116,7 @@ virtio_iommu_get_config(uint64_t page_size_mask, uint64_t start, uint64_t end, u virtio_iommu_set_config(uint8_t bypass) "bypass=0x%x" virtio_iommu_attach(uint32_t domain_id, uint32_t ep_id) "domain=%d endpoint=%d" virtio_iommu_detach(uint32_t domain_id, uint32_t ep_id) "domain=%d endpoint=%d" +virtio_iommu_detach_endpoint_from_domain(uint32_t domain_id, uint32_t ep_id) "domain=%d endpoint=%d" virtio_iommu_map(uint32_t domain_id, uint64_t virt_start, uint64_t virt_end, uint64_t phys_start, uint32_t flags) "domain=%d virt_start=0x%"PRIx64" virt_end=0x%"PRIx64 " phys_start=0x%"PRIx64" flags=%d" virtio_iommu_unmap(uint32_t domain_id, uint64_t virt_start, uint64_t virt_end) "domain=%d virt_start=0x%"PRIx64" virt_end=0x%"PRIx64 virtio_iommu_unmap_done(uint32_t domain_id, uint64_t virt_start, uint64_t virt_end) "domain=%d virt_start=0x%"PRIx64" virt_end=0x%"PRIx64 @@ -131,7 +132,7 @@ virtio_iommu_fill_resv_property(uint32_t devid, uint8_t subtype, uint64_t start, virtio_iommu_notify_map(const char *name, uint64_t virt_start, uint64_t virt_end, uint64_t phys_start, uint32_t flags) "mr=%s virt_start=0x%"PRIx64" virt_end=0x%"PRIx64" phys_start=0x%"PRIx64" flags=%d" virtio_iommu_notify_unmap(const char *name, uint64_t virt_start, uint64_t virt_end) "mr=%s virt_start=0x%"PRIx64" virt_end=0x%"PRIx64 virtio_iommu_remap(const char *name, uint64_t virt_start, uint64_t virt_end, uint64_t phys_start) "mr=%s virt_start=0x%"PRIx64" virt_end=0x%"PRIx64" phys_start=0x%"PRIx64 -virtio_iommu_set_page_size_mask(const char *name, uint64_t old, uint64_t new) "mr=%s old_mask=0x%"PRIx64" new_mask=0x%"PRIx64 +virtio_iommu_update_page_size_mask(const char *name, uint64_t old, uint64_t new) "host iommu device=%s old_mask=0x%"PRIx64" new_mask=0x%"PRIx64 virtio_iommu_notify_flag_add(const char *name) "add notifier to mr %s" virtio_iommu_notify_flag_del(const char *name) "del notifier from mr %s" virtio_iommu_switch_address_space(uint8_t bus, uint8_t slot, uint8_t fn, bool on) "Device %02x:%02x.%x switching address space (iommu enabled=%d)" diff --git a/hw/virtio/vdpa-dev-pci.c b/hw/virtio/vdpa-dev-pci.c index 5446e6b..3068112 100644 --- a/hw/virtio/vdpa-dev-pci.c +++ b/hw/virtio/vdpa-dev-pci.c @@ -48,10 +48,6 @@ static void vhost_vdpa_device_pci_instance_init(Object *obj) "bootindex"); } -static Property vhost_vdpa_device_pci_properties[] = { - DEFINE_PROP_END_OF_LIST(), -}; - static int vhost_vdpa_device_pci_post_init(VhostVdpaDevice *v, Error **errp) { VhostVdpaDevicePCI *dev = container_of(v, VhostVdpaDevicePCI, vdev); @@ -74,13 +70,13 @@ vhost_vdpa_device_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) qdev_realize(DEVICE(&dev->vdev), BUS(&vpci_dev->bus), errp); } -static void vhost_vdpa_device_pci_class_init(ObjectClass *klass, void *data) +static void vhost_vdpa_device_pci_class_init(ObjectClass *klass, + const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); set_bit(DEVICE_CATEGORY_MISC, dc->categories); - device_class_set_props(dc, vhost_vdpa_device_pci_properties); k->realize = vhost_vdpa_device_pci_realize; } diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c index 64b96b226c..d1da40a 100644 --- a/hw/virtio/vdpa-dev.c +++ b/hw/virtio/vdpa-dev.c @@ -26,8 +26,8 @@ #include "hw/virtio/virtio.h" #include "hw/virtio/virtio-bus.h" #include "hw/virtio/vdpa-dev.h" -#include "sysemu/sysemu.h" -#include "sysemu/runstate.h" +#include "system/system.h" +#include "system/runstate.h" static void vhost_vdpa_device_dummy_handle_output(VirtIODevice *vdev, VirtQueue *vq) @@ -312,7 +312,7 @@ static void vhost_vdpa_device_stop(VirtIODevice *vdev) vhost_dev_disable_notifiers(&s->dev, vdev); } -static void vhost_vdpa_device_set_status(VirtIODevice *vdev, uint8_t status) +static int vhost_vdpa_device_set_status(VirtIODevice *vdev, uint8_t status) { VhostVdpaDevice *s = VHOST_VDPA_DEVICE(vdev); bool should_start = virtio_device_started(vdev, status); @@ -324,7 +324,7 @@ static void vhost_vdpa_device_set_status(VirtIODevice *vdev, uint8_t status) } if (s->started == should_start) { - return; + return 0; } if (should_start) { @@ -335,12 +335,12 @@ static void vhost_vdpa_device_set_status(VirtIODevice *vdev, uint8_t status) } else { vhost_vdpa_device_stop(vdev); } + return 0; } -static Property vhost_vdpa_device_properties[] = { +static const Property vhost_vdpa_device_properties[] = { DEFINE_PROP_STRING("vhostdev", VhostVdpaDevice, vhostdev), DEFINE_PROP_UINT16("queue-size", VhostVdpaDevice, queue_size, 0), - DEFINE_PROP_END_OF_LIST(), }; static const VMStateDescription vmstate_vhost_vdpa_device = { @@ -354,7 +354,7 @@ static const VMStateDescription vmstate_vhost_vdpa_device = { }, }; -static void vhost_vdpa_device_class_init(ObjectClass *klass, void *data) +static void vhost_vdpa_device_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); diff --git a/hw/virtio/vhost-iova-tree.c b/hw/virtio/vhost-iova-tree.c index 3d03395..fa4147b 100644 --- a/hw/virtio/vhost-iova-tree.c +++ b/hw/virtio/vhost-iova-tree.c @@ -28,12 +28,18 @@ struct VhostIOVATree { /* IOVA address to qemu memory maps. */ IOVATree *iova_taddr_map; + + /* Allocated IOVA addresses */ + IOVATree *iova_map; + + /* GPA->IOVA address memory maps */ + IOVATree *gpa_iova_map; }; /** - * Create a new IOVA tree + * Create a new VhostIOVATree * - * Returns the new IOVA tree + * Returns the new VhostIOVATree. */ VhostIOVATree *vhost_iova_tree_new(hwaddr iova_first, hwaddr iova_last) { @@ -44,25 +50,29 @@ VhostIOVATree *vhost_iova_tree_new(hwaddr iova_first, hwaddr iova_last) tree->iova_last = iova_last; tree->iova_taddr_map = iova_tree_new(); + tree->iova_map = iova_tree_new(); + tree->gpa_iova_map = gpa_tree_new(); return tree; } /** - * Delete an iova tree + * Delete a VhostIOVATree */ void vhost_iova_tree_delete(VhostIOVATree *iova_tree) { iova_tree_destroy(iova_tree->iova_taddr_map); + iova_tree_destroy(iova_tree->iova_map); + iova_tree_destroy(iova_tree->gpa_iova_map); g_free(iova_tree); } /** * Find the IOVA address stored from a memory address * - * @tree: The iova tree + * @tree: The VhostIOVATree * @map: The map with the memory address * - * Return the stored mapping, or NULL if not found. + * Returns the stored IOVA->HVA mapping, or NULL if not found. */ const DMAMap *vhost_iova_tree_find_iova(const VhostIOVATree *tree, const DMAMap *map) @@ -71,40 +81,111 @@ const DMAMap *vhost_iova_tree_find_iova(const VhostIOVATree *tree, } /** - * Allocate a new mapping + * Allocate a new IOVA range and add the mapping to the IOVA->HVA tree * - * @tree: The iova tree - * @map: The iova map + * @tree: The VhostIOVATree + * @map: The IOVA mapping + * @taddr: The translated address (HVA) * * Returns: * - IOVA_OK if the map fits in the container * - IOVA_ERR_INVALID if the map does not make sense (like size overflow) * - IOVA_ERR_NOMEM if tree cannot allocate more space. * - * It returns assignated iova in map->iova if return value is VHOST_DMA_MAP_OK. + * It returns an assigned IOVA in map->iova if the return value is IOVA_OK. */ -int vhost_iova_tree_map_alloc(VhostIOVATree *tree, DMAMap *map) +int vhost_iova_tree_map_alloc(VhostIOVATree *tree, DMAMap *map, hwaddr taddr) { + int ret; + /* Some vhost devices do not like addr 0. Skip first page */ hwaddr iova_first = tree->iova_first ?: qemu_real_host_page_size(); - if (map->translated_addr + map->size < map->translated_addr || - map->perm == IOMMU_NONE) { + if (taddr + map->size < taddr || map->perm == IOMMU_NONE) { return IOVA_ERR_INVALID; } - /* Allocate a node in IOVA address */ - return iova_tree_alloc_map(tree->iova_taddr_map, map, iova_first, - tree->iova_last); + /* Allocate a node in the IOVA-only tree */ + ret = iova_tree_alloc_map(tree->iova_map, map, iova_first, tree->iova_last); + if (unlikely(ret != IOVA_OK)) { + return ret; + } + + /* Insert a node in the IOVA->HVA tree */ + map->translated_addr = taddr; + return iova_tree_insert(tree->iova_taddr_map, map); } /** - * Remove existing mappings from iova tree + * Remove existing mappings from the IOVA-only and IOVA->HVA trees * - * @iova_tree: The vhost iova tree + * @iova_tree: The VhostIOVATree * @map: The map to remove */ void vhost_iova_tree_remove(VhostIOVATree *iova_tree, DMAMap map) { iova_tree_remove(iova_tree->iova_taddr_map, map); + iova_tree_remove(iova_tree->iova_map, map); +} + +/** + * Find the IOVA address stored from a guest memory address (GPA) + * + * @tree: The VhostIOVATree + * @map: The map with the guest memory address + * + * Returns the stored GPA->IOVA mapping, or NULL if not found. + */ +const DMAMap *vhost_iova_tree_find_gpa(const VhostIOVATree *tree, + const DMAMap *map) +{ + return iova_tree_find_iova(tree->gpa_iova_map, map); +} + +/** + * Allocate a new IOVA range and add the mapping to the GPA->IOVA tree + * + * @tree: The VhostIOVATree + * @map: The IOVA mapping + * @taddr: The translated address (GPA) + * + * Returns: + * - IOVA_OK if the map fits both containers + * - IOVA_ERR_INVALID if the map does not make sense (like size overflow) + * - IOVA_ERR_NOMEM if the IOVA-only tree cannot allocate more space + * + * It returns an assigned IOVA in map->iova if the return value is IOVA_OK. + */ +int vhost_iova_tree_map_alloc_gpa(VhostIOVATree *tree, DMAMap *map, hwaddr taddr) +{ + int ret; + + /* Some vhost devices don't like addr 0. Skip first page */ + hwaddr iova_first = tree->iova_first ?: qemu_real_host_page_size(); + + if (taddr + map->size < taddr || map->perm == IOMMU_NONE) { + return IOVA_ERR_INVALID; + } + + /* Allocate a node in the IOVA-only tree */ + ret = iova_tree_alloc_map(tree->iova_map, map, iova_first, tree->iova_last); + if (unlikely(ret != IOVA_OK)) { + return ret; + } + + /* Insert a node in the GPA->IOVA tree */ + map->translated_addr = taddr; + return gpa_tree_insert(tree->gpa_iova_map, map); +} + +/** + * Remove existing mappings from the IOVA-only and GPA->IOVA trees + * + * @tree: The VhostIOVATree + * @map: The map to remove + */ +void vhost_iova_tree_remove_gpa(VhostIOVATree *iova_tree, DMAMap map) +{ + iova_tree_remove(iova_tree->gpa_iova_map, map); + iova_tree_remove(iova_tree->iova_map, map); } diff --git a/hw/virtio/vhost-iova-tree.h b/hw/virtio/vhost-iova-tree.h index 4adfd79..08f63b6 100644 --- a/hw/virtio/vhost-iova-tree.h +++ b/hw/virtio/vhost-iova-tree.h @@ -11,7 +11,7 @@ #define HW_VIRTIO_VHOST_IOVA_TREE_H #include "qemu/iova-tree.h" -#include "exec/memory.h" +#include "system/memory.h" typedef struct VhostIOVATree VhostIOVATree; @@ -21,7 +21,13 @@ G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostIOVATree, vhost_iova_tree_delete); const DMAMap *vhost_iova_tree_find_iova(const VhostIOVATree *iova_tree, const DMAMap *map); -int vhost_iova_tree_map_alloc(VhostIOVATree *iova_tree, DMAMap *map); +int vhost_iova_tree_map_alloc(VhostIOVATree *iova_tree, DMAMap *map, + hwaddr taddr); void vhost_iova_tree_remove(VhostIOVATree *iova_tree, DMAMap map); +const DMAMap *vhost_iova_tree_find_gpa(const VhostIOVATree *iova_tree, + const DMAMap *map); +int vhost_iova_tree_map_alloc_gpa(VhostIOVATree *iova_tree, DMAMap *map, + hwaddr taddr); +void vhost_iova_tree_remove_gpa(VhostIOVATree *iova_tree, DMAMap map); #endif diff --git a/hw/virtio/vhost-scsi-pci.c b/hw/virtio/vhost-scsi-pci.c index 08980bc..7399ace 100644 --- a/hw/virtio/vhost-scsi-pci.c +++ b/hw/virtio/vhost-scsi-pci.c @@ -38,10 +38,9 @@ struct VHostSCSIPCI { VHostSCSI vdev; }; -static Property vhost_scsi_pci_properties[] = { +static const Property vhost_scsi_pci_properties[] = { DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, DEV_NVECTORS_UNSPECIFIED), - DEFINE_PROP_END_OF_LIST(), }; static void vhost_scsi_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) @@ -62,7 +61,7 @@ static void vhost_scsi_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) qdev_realize(vdev, BUS(&vpci_dev->bus), errp); } -static void vhost_scsi_pci_class_init(ObjectClass *klass, void *data) +static void vhost_scsi_pci_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c index fc5f408..2481d49 100644 --- a/hw/virtio/vhost-shadow-virtqueue.c +++ b/hw/virtio/vhost-shadow-virtqueue.c @@ -78,24 +78,39 @@ uint16_t vhost_svq_available_slots(const VhostShadowVirtqueue *svq) * @vaddr: Translated IOVA addresses * @iovec: Source qemu's VA addresses * @num: Length of iovec and minimum length of vaddr + * @gpas: Descriptors' GPAs, if backed by guest memory */ static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq, hwaddr *addrs, const struct iovec *iovec, - size_t num) + size_t num, const hwaddr *gpas) { if (num == 0) { return true; } for (size_t i = 0; i < num; ++i) { - DMAMap needle = { - .translated_addr = (hwaddr)(uintptr_t)iovec[i].iov_base, - .size = iovec[i].iov_len, - }; Int128 needle_last, map_last; size_t off; + const DMAMap *map; + DMAMap needle; + + /* Check if the descriptor is backed by guest memory */ + if (gpas) { + /* Search the GPA->IOVA tree */ + needle = (DMAMap) { + .translated_addr = gpas[i], + .size = iovec[i].iov_len, + }; + map = vhost_iova_tree_find_gpa(svq->iova_tree, &needle); + } else { + /* Search the IOVA->HVA tree */ + needle = (DMAMap) { + .translated_addr = (hwaddr)(uintptr_t)iovec[i].iov_base, + .size = iovec[i].iov_len, + }; + map = vhost_iova_tree_find_iova(svq->iova_tree, &needle); + } - const DMAMap *map = vhost_iova_tree_find_iova(svq->iova_tree, &needle); /* * Map cannot be NULL since iova map contains all guest space and * qemu already has a physical address mapped @@ -130,6 +145,7 @@ static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq, * @sg: Cache for hwaddr * @iovec: The iovec from the guest * @num: iovec length + * @addr: Descriptors' GPAs, if backed by guest memory * @more_descs: True if more descriptors come in the chain * @write: True if they are writeable descriptors * @@ -137,7 +153,8 @@ static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq, */ static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, const struct iovec *iovec, size_t num, - bool more_descs, bool write) + const hwaddr *addr, bool more_descs, + bool write) { uint16_t i = svq->free_head, last = svq->free_head; unsigned n; @@ -149,7 +166,7 @@ static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, return true; } - ok = vhost_svq_translate_addr(svq, sg, iovec, num); + ok = vhost_svq_translate_addr(svq, sg, iovec, num, addr); if (unlikely(!ok)) { return false; } @@ -165,17 +182,18 @@ static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, descs[i].len = cpu_to_le32(iovec[n].iov_len); last = i; - i = cpu_to_le16(svq->desc_next[i]); + i = svq->desc_next[i]; } - svq->free_head = le16_to_cpu(svq->desc_next[last]); + svq->free_head = svq->desc_next[last]; return true; } static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, const struct iovec *out_sg, size_t out_num, + const hwaddr *out_addr, const struct iovec *in_sg, size_t in_num, - unsigned *head) + const hwaddr *in_addr, unsigned *head) { unsigned avail_idx; vring_avail_t *avail = svq->vring.avail; @@ -191,13 +209,14 @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, return false; } - ok = vhost_svq_vring_write_descs(svq, sgs, out_sg, out_num, in_num > 0, - false); + ok = vhost_svq_vring_write_descs(svq, sgs, out_sg, out_num, out_addr, + in_num > 0, false); if (unlikely(!ok)) { return false; } - ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, false, true); + ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, in_addr, false, + true); if (unlikely(!ok)) { return false; } @@ -228,10 +247,12 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq) smp_mb(); if (virtio_vdev_has_feature(svq->vdev, VIRTIO_RING_F_EVENT_IDX)) { - uint16_t avail_event = *(uint16_t *)(&svq->vring.used->ring[svq->vring.num]); + uint16_t avail_event = le16_to_cpu( + *(uint16_t *)(&svq->vring.used->ring[svq->vring.num])); needs_kick = vring_need_event(avail_event, svq->shadow_avail_idx, svq->shadow_avail_idx - 1); } else { - needs_kick = !(svq->vring.used->flags & VRING_USED_F_NO_NOTIFY); + needs_kick = + !(svq->vring.used->flags & cpu_to_le16(VRING_USED_F_NO_NOTIFY)); } if (!needs_kick) { @@ -247,8 +268,9 @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq) * Return -EINVAL if element is invalid, -ENOSPC if dev queue is full */ int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, - size_t out_num, const struct iovec *in_sg, size_t in_num, - VirtQueueElement *elem) + size_t out_num, const hwaddr *out_addr, + const struct iovec *in_sg, size_t in_num, + const hwaddr *in_addr, VirtQueueElement *elem) { unsigned qemu_head; unsigned ndescs = in_num + out_num; @@ -258,7 +280,8 @@ int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, return -ENOSPC; } - ok = vhost_svq_add_split(svq, out_sg, out_num, in_sg, in_num, &qemu_head); + ok = vhost_svq_add_split(svq, out_sg, out_num, out_addr, in_sg, in_num, + in_addr, &qemu_head); if (unlikely(!ok)) { return -EINVAL; } @@ -274,8 +297,8 @@ int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, static int vhost_svq_add_element(VhostShadowVirtqueue *svq, VirtQueueElement *elem) { - return vhost_svq_add(svq, elem->out_sg, elem->out_num, elem->in_sg, - elem->in_num, elem); + return vhost_svq_add(svq, elem->out_sg, elem->out_num, elem->out_addr, + elem->in_sg, elem->in_num, elem->in_addr, elem); } /** @@ -365,7 +388,7 @@ static bool vhost_svq_more_used(VhostShadowVirtqueue *svq) return true; } - svq->shadow_used_idx = cpu_to_le16(*(volatile uint16_t *)used_idx); + svq->shadow_used_idx = le16_to_cpu(*(volatile uint16_t *)used_idx); return svq->last_used_idx != svq->shadow_used_idx; } @@ -383,7 +406,7 @@ static bool vhost_svq_enable_notification(VhostShadowVirtqueue *svq) { if (virtio_vdev_has_feature(svq->vdev, VIRTIO_RING_F_EVENT_IDX)) { uint16_t *used_event = (uint16_t *)&svq->vring.avail->ring[svq->vring.num]; - *used_event = svq->shadow_used_idx; + *used_event = cpu_to_le16(svq->shadow_used_idx); } else { svq->vring.avail->flags &= ~cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT); } @@ -408,12 +431,13 @@ static uint16_t vhost_svq_last_desc_of_chain(const VhostShadowVirtqueue *svq, uint16_t num, uint16_t i) { for (uint16_t j = 0; j < (num - 1); ++j) { - i = le16_to_cpu(svq->desc_next[i]); + i = svq->desc_next[i]; } return i; } +G_GNUC_WARN_UNUSED_RESULT static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, uint32_t *len) { @@ -526,10 +550,11 @@ static void vhost_svq_flush(VhostShadowVirtqueue *svq, size_t vhost_svq_poll(VhostShadowVirtqueue *svq, size_t num) { size_t len = 0; - uint32_t r; while (num--) { + g_autofree VirtQueueElement *elem = NULL; int64_t start_us = g_get_monotonic_time(); + uint32_t r = 0; do { if (vhost_svq_more_used(svq)) { @@ -541,7 +566,7 @@ size_t vhost_svq_poll(VhostShadowVirtqueue *svq, size_t num) } } while (true); - vhost_svq_get_buf(svq, &r); + elem = vhost_svq_get_buf(svq, &r); len += r; } @@ -681,7 +706,7 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, svq->desc_state = g_new0(SVQDescState, svq->vring.num); svq->desc_next = g_new0(uint16_t, svq->vring.num); for (unsigned i = 0; i < svq->vring.num - 1; i++) { - svq->desc_next[i] = cpu_to_le16(i + 1); + svq->desc_next[i] = i + 1; } } diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h index 19c842a..9c27373 100644 --- a/hw/virtio/vhost-shadow-virtqueue.h +++ b/hw/virtio/vhost-shadow-virtqueue.h @@ -118,8 +118,9 @@ uint16_t vhost_svq_available_slots(const VhostShadowVirtqueue *svq); void vhost_svq_push_elem(VhostShadowVirtqueue *svq, const VirtQueueElement *elem, uint32_t len); int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, - size_t out_num, const struct iovec *in_sg, size_t in_num, - VirtQueueElement *elem); + size_t out_num, const hwaddr *out_addr, + const struct iovec *in_sg, size_t in_num, + const hwaddr *in_addr, VirtQueueElement *elem); size_t vhost_svq_poll(VhostShadowVirtqueue *svq, size_t num); void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd); diff --git a/hw/virtio/vhost-user-base.c b/hw/virtio/vhost-user-base.c index a831671..ff67a02 100644 --- a/hw/virtio/vhost-user-base.c +++ b/hw/virtio/vhost-user-base.c @@ -66,7 +66,7 @@ err_host_notifiers: vhost_dev_disable_notifiers(&vub->vhost_dev, vdev); } -static void vub_stop(VirtIODevice *vdev) +static int vub_stop(VirtIODevice *vdev) { VHostUserBase *vub = VHOST_USER_BASE(vdev); BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); @@ -74,34 +74,39 @@ static void vub_stop(VirtIODevice *vdev) int ret; if (!k->set_guest_notifiers) { - return; + return 0; } - vhost_dev_stop(&vub->vhost_dev, vdev, true); + ret = vhost_dev_stop(&vub->vhost_dev, vdev, true); - ret = k->set_guest_notifiers(qbus->parent, vub->vhost_dev.nvqs, false); - if (ret < 0) { + if (k->set_guest_notifiers(qbus->parent, vub->vhost_dev.nvqs, false) < 0) { error_report("vhost guest notifier cleanup failed: %d", ret); - return; + return -1; } vhost_dev_disable_notifiers(&vub->vhost_dev, vdev); + return ret; } -static void vub_set_status(VirtIODevice *vdev, uint8_t status) +static int vub_set_status(VirtIODevice *vdev, uint8_t status) { VHostUserBase *vub = VHOST_USER_BASE(vdev); bool should_start = virtio_device_should_start(vdev, status); if (vhost_dev_is_started(&vub->vhost_dev) == should_start) { - return; + return 0; } if (should_start) { vub_start(vdev); } else { - vub_stop(vdev); + int ret; + ret = vub_stop(vdev); + if (ret < 0) { + return ret; + } } + return 0; } /* @@ -223,15 +228,18 @@ static void vub_disconnect(DeviceState *dev) { VirtIODevice *vdev = VIRTIO_DEVICE(dev); VHostUserBase *vub = VHOST_USER_BASE(vdev); + struct vhost_virtqueue *vhost_vqs = vub->vhost_dev.vqs; if (!vub->connected) { - return; + goto done; } vub->connected = false; vub_stop(vdev); vhost_dev_cleanup(&vub->vhost_dev); + g_free(vhost_vqs); +done: /* Re-instate the event handler for new connections */ qemu_chr_fe_set_handlers(&vub->chardev, NULL, NULL, vub_event, @@ -254,7 +262,7 @@ static void vub_event(void *opaque, QEMUChrEvent event) case CHR_EVENT_CLOSED: /* defer close until later to avoid circular close */ vhost_user_async_close(dev, &vub->chardev, &vub->vhost_dev, - vub_disconnect, vub_event); + vub_disconnect); break; case CHR_EVENT_BREAK: case CHR_EVENT_MUX_IN: @@ -345,7 +353,7 @@ static void vub_device_unrealize(DeviceState *dev) do_vhost_user_cleanup(vdev, vub); } -static void vub_class_init(ObjectClass *klass, void *data) +static void vub_class_init(ObjectClass *klass, const void *data) { VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); diff --git a/hw/virtio/vhost-user-blk-pci.c b/hw/virtio/vhost-user-blk-pci.c index eef8641..904369f 100644 --- a/hw/virtio/vhost-user-blk-pci.c +++ b/hw/virtio/vhost-user-blk-pci.c @@ -43,11 +43,10 @@ struct VHostUserBlkPCI { VHostUserBlk vdev; }; -static Property vhost_user_blk_pci_properties[] = { +static const Property vhost_user_blk_pci_properties[] = { DEFINE_PROP_UINT32("class", VirtIOPCIProxy, class_code, 0), DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, DEV_NVECTORS_UNSPECIFIED), - DEFINE_PROP_END_OF_LIST(), }; static void vhost_user_blk_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) @@ -66,7 +65,7 @@ static void vhost_user_blk_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) qdev_realize(vdev, BUS(&vpci_dev->bus), errp); } -static void vhost_user_blk_pci_class_init(ObjectClass *klass, void *data) +static void vhost_user_blk_pci_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); diff --git a/hw/virtio/vhost-user-device-pci.c b/hw/virtio/vhost-user-device-pci.c index efaf55d..f10bac8 100644 --- a/hw/virtio/vhost-user-device-pci.c +++ b/hw/virtio/vhost-user-device-pci.c @@ -31,7 +31,8 @@ static void vhost_user_device_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp qdev_realize(vdev, BUS(&vpci_dev->bus), errp); } -static void vhost_user_device_pci_class_init(ObjectClass *klass, void *data) +static void vhost_user_device_pci_class_init(ObjectClass *klass, + const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); diff --git a/hw/virtio/vhost-user-device.c b/hw/virtio/vhost-user-device.c index 67aa934..3939bdf 100644 --- a/hw/virtio/vhost-user-device.c +++ b/hw/virtio/vhost-user-device.c @@ -29,16 +29,15 @@ static const VMStateDescription vud_vmstate = { .unmigratable = 1, }; -static Property vud_properties[] = { +static const Property vud_properties[] = { DEFINE_PROP_CHR("chardev", VHostUserBase, chardev), DEFINE_PROP_UINT16("virtio-id", VHostUserBase, virtio_id, 0), DEFINE_PROP_UINT32("vq_size", VHostUserBase, vq_size, 64), DEFINE_PROP_UINT32("num_vqs", VHostUserBase, num_vqs, 1), DEFINE_PROP_UINT32("config_size", VHostUserBase, config_size, 0), - DEFINE_PROP_END_OF_LIST(), }; -static void vud_class_init(ObjectClass *klass, void *data) +static void vud_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); diff --git a/hw/virtio/vhost-user-fs-pci.c b/hw/virtio/vhost-user-fs-pci.c index 6829b8b..1490c11 100644 --- a/hw/virtio/vhost-user-fs-pci.c +++ b/hw/virtio/vhost-user-fs-pci.c @@ -29,10 +29,9 @@ typedef struct VHostUserFSPCI VHostUserFSPCI; DECLARE_INSTANCE_CHECKER(VHostUserFSPCI, VHOST_USER_FS_PCI, TYPE_VHOST_USER_FS_PCI) -static Property vhost_user_fs_pci_properties[] = { +static const Property vhost_user_fs_pci_properties[] = { DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, DEV_NVECTORS_UNSPECIFIED), - DEFINE_PROP_END_OF_LIST(), }; static void vhost_user_fs_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) @@ -48,7 +47,7 @@ static void vhost_user_fs_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) qdev_realize(vdev, BUS(&vpci_dev->bus), errp); } -static void vhost_user_fs_pci_class_init(ObjectClass *klass, void *data) +static void vhost_user_fs_pci_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c index cca2cd4..e77c69e 100644 --- a/hw/virtio/vhost-user-fs.c +++ b/hw/virtio/vhost-user-fs.c @@ -23,7 +23,7 @@ #include "hw/virtio/vhost.h" #include "hw/virtio/vhost-user-fs.h" #include "monitor/monitor.h" -#include "sysemu/sysemu.h" +#include "system/system.h" static const int user_feature_bits[] = { VIRTIO_F_VERSION_1, @@ -33,7 +33,8 @@ static const int user_feature_bits[] = { VIRTIO_F_RING_PACKED, VIRTIO_F_IOMMU_PLATFORM, VIRTIO_F_RING_RESET, - + VIRTIO_F_IN_ORDER, + VIRTIO_F_NOTIFICATION_DATA, VHOST_INVALID_FEATURE_BIT }; @@ -99,7 +100,7 @@ err_host_notifiers: vhost_dev_disable_notifiers(&fs->vhost_dev, vdev); } -static void vuf_stop(VirtIODevice *vdev) +static int vuf_stop(VirtIODevice *vdev) { VHostUserFS *fs = VHOST_USER_FS(vdev); BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); @@ -107,34 +108,39 @@ static void vuf_stop(VirtIODevice *vdev) int ret; if (!k->set_guest_notifiers) { - return; + return 0; } - vhost_dev_stop(&fs->vhost_dev, vdev, true); + ret = vhost_dev_stop(&fs->vhost_dev, vdev, true); - ret = k->set_guest_notifiers(qbus->parent, fs->vhost_dev.nvqs, false); - if (ret < 0) { + if (k->set_guest_notifiers(qbus->parent, fs->vhost_dev.nvqs, false) < 0) { error_report("vhost guest notifier cleanup failed: %d", ret); - return; + return -1; } vhost_dev_disable_notifiers(&fs->vhost_dev, vdev); + return ret; } -static void vuf_set_status(VirtIODevice *vdev, uint8_t status) +static int vuf_set_status(VirtIODevice *vdev, uint8_t status) { VHostUserFS *fs = VHOST_USER_FS(vdev); bool should_start = virtio_device_should_start(vdev, status); if (vhost_dev_is_started(&fs->vhost_dev) == should_start) { - return; + return 0; } if (should_start) { vuf_start(vdev); } else { - vuf_stop(vdev); + int ret; + ret = vuf_stop(vdev); + if (ret < 0) { + return ret; + } } + return 0; } static uint64_t vuf_get_features(VirtIODevice *vdev, @@ -266,7 +272,6 @@ err_virtio: g_free(fs->req_vqs); virtio_cleanup(vdev); g_free(fs->vhost_dev.vqs); - return; } static void vuf_device_unrealize(DeviceState *dev) @@ -402,13 +407,12 @@ static const VMStateDescription vuf_backend_vmstate = { }, }; -static Property vuf_properties[] = { +static const Property vuf_properties[] = { DEFINE_PROP_CHR("chardev", VHostUserFS, conf.chardev), DEFINE_PROP_STRING("tag", VHostUserFS, conf.tag), DEFINE_PROP_UINT16("num-request-queues", VHostUserFS, conf.num_request_queues, 1), DEFINE_PROP_UINT16("queue-size", VHostUserFS, conf.queue_size, 128), - DEFINE_PROP_END_OF_LIST(), }; static void vuf_instance_init(Object *obj) @@ -419,7 +423,7 @@ static void vuf_instance_init(Object *obj) "/filesystem@0", DEVICE(obj)); } -static void vuf_class_init(ObjectClass *klass, void *data) +static void vuf_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); diff --git a/hw/virtio/vhost-user-gpio-pci.c b/hw/virtio/vhost-user-gpio-pci.c index b3028a2..9b165b5 100644 --- a/hw/virtio/vhost-user-gpio-pci.c +++ b/hw/virtio/vhost-user-gpio-pci.c @@ -32,7 +32,7 @@ static void vhost_user_gpio_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) qdev_realize(vdev, BUS(&vpci_dev->bus), errp); } -static void vhost_user_gpio_pci_class_init(ObjectClass *klass, void *data) +static void vhost_user_gpio_pci_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); diff --git a/hw/virtio/vhost-user-gpio.c b/hw/virtio/vhost-user-gpio.c index 9f37c25..a7fd49b 100644 --- a/hw/virtio/vhost-user-gpio.c +++ b/hw/virtio/vhost-user-gpio.c @@ -14,9 +14,8 @@ #include "standard-headers/linux/virtio_ids.h" #include "standard-headers/linux/virtio_gpio.h" -static Property vgpio_properties[] = { +static const Property vgpio_properties[] = { DEFINE_PROP_CHR("chardev", VHostUserBase, chardev), - DEFINE_PROP_END_OF_LIST(), }; static void vgpio_realize(DeviceState *dev, Error **errp) @@ -37,7 +36,7 @@ static const VMStateDescription vu_gpio_vmstate = { .unmigratable = 1, }; -static void vu_gpio_class_init(ObjectClass *klass, void *data) +static void vu_gpio_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); VHostUserBaseClass *vubc = VHOST_USER_BASE_CLASS(klass); diff --git a/hw/virtio/vhost-user-i2c-pci.c b/hw/virtio/vhost-user-i2c-pci.c index 00ac109..692cd66 100644 --- a/hw/virtio/vhost-user-i2c-pci.c +++ b/hw/virtio/vhost-user-i2c-pci.c @@ -32,7 +32,7 @@ static void vhost_user_i2c_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) qdev_realize(vdev, BUS(&vpci_dev->bus), errp); } -static void vhost_user_i2c_pci_class_init(ObjectClass *klass, void *data) +static void vhost_user_i2c_pci_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); diff --git a/hw/virtio/vhost-user-i2c.c b/hw/virtio/vhost-user-i2c.c index a464f5e..ae007fe 100644 --- a/hw/virtio/vhost-user-i2c.c +++ b/hw/virtio/vhost-user-i2c.c @@ -14,9 +14,8 @@ #include "qemu/error-report.h" #include "standard-headers/linux/virtio_ids.h" -static Property vi2c_properties[] = { +static const Property vi2c_properties[] = { DEFINE_PROP_CHR("chardev", VHostUserBase, chardev), - DEFINE_PROP_END_OF_LIST(), }; static void vi2c_realize(DeviceState *dev, Error **errp) @@ -37,7 +36,7 @@ static const VMStateDescription vu_i2c_vmstate = { .unmigratable = 1, }; -static void vu_i2c_class_init(ObjectClass *klass, void *data) +static void vu_i2c_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); VHostUserBaseClass *vubc = VHOST_USER_BASE_CLASS(klass); diff --git a/hw/virtio/vhost-user-input.c b/hw/virtio/vhost-user-input.c index bedec04..5cfc5bb 100644 --- a/hw/virtio/vhost-user-input.c +++ b/hw/virtio/vhost-user-input.c @@ -7,9 +7,8 @@ #include "qemu/osdep.h" #include "hw/virtio/virtio-input.h" -static Property vinput_properties[] = { +static const Property vinput_properties[] = { DEFINE_PROP_CHR("chardev", VHostUserBase, chardev), - DEFINE_PROP_END_OF_LIST(), }; static void vinput_realize(DeviceState *dev, Error **errp) @@ -31,7 +30,7 @@ static const VMStateDescription vmstate_vhost_input = { .unmigratable = 1, }; -static void vhost_input_class_init(ObjectClass *klass, void *data) +static void vhost_input_class_init(ObjectClass *klass, const void *data) { VHostUserBaseClass *vubc = VHOST_USER_BASE_CLASS(klass); DeviceClass *dc = DEVICE_CLASS(klass); diff --git a/hw/virtio/vhost-user-rng-pci.c b/hw/virtio/vhost-user-rng-pci.c index f649354..9f45fc6 100644 --- a/hw/virtio/vhost-user-rng-pci.c +++ b/hw/virtio/vhost-user-rng-pci.c @@ -23,10 +23,9 @@ typedef struct VHostUserRNGPCI VHostUserRNGPCI; DECLARE_INSTANCE_CHECKER(VHostUserRNGPCI, VHOST_USER_RNG_PCI, TYPE_VHOST_USER_RNG_PCI) -static Property vhost_user_rng_pci_properties[] = { +static const Property vhost_user_rng_pci_properties[] = { DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, DEV_NVECTORS_UNSPECIFIED), - DEFINE_PROP_END_OF_LIST(), }; static void vhost_user_rng_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) @@ -41,7 +40,7 @@ static void vhost_user_rng_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) qdev_realize(vdev, BUS(&vpci_dev->bus), errp); } -static void vhost_user_rng_pci_class_init(ObjectClass *klass, void *data) +static void vhost_user_rng_pci_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); diff --git a/hw/virtio/vhost-user-rng.c b/hw/virtio/vhost-user-rng.c index 01879c8..61dadcd 100644 --- a/hw/virtio/vhost-user-rng.c +++ b/hw/virtio/vhost-user-rng.c @@ -20,9 +20,8 @@ static const VMStateDescription vu_rng_vmstate = { .unmigratable = 1, }; -static Property vrng_properties[] = { +static const Property vrng_properties[] = { DEFINE_PROP_CHR("chardev", VHostUserBase, chardev), - DEFINE_PROP_END_OF_LIST(), }; static void vu_rng_base_realize(DeviceState *dev, Error **errp) @@ -38,7 +37,7 @@ static void vu_rng_base_realize(DeviceState *dev, Error **errp) vubs->parent_realize(dev, errp); } -static void vu_rng_class_init(ObjectClass *klass, void *data) +static void vu_rng_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); VHostUserBaseClass *vubc = VHOST_USER_BASE_CLASS(klass); diff --git a/hw/virtio/vhost-user-scmi-pci.c b/hw/virtio/vhost-user-scmi-pci.c index 7f53af7..0ab56a5 100644 --- a/hw/virtio/vhost-user-scmi-pci.c +++ b/hw/virtio/vhost-user-scmi-pci.c @@ -31,7 +31,7 @@ static void vhost_user_scmi_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) qdev_realize(vdev, BUS(&vpci_dev->bus), errp); } -static void vhost_user_scmi_pci_class_init(ObjectClass *klass, void *data) +static void vhost_user_scmi_pci_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); diff --git a/hw/virtio/vhost-user-scmi.c b/hw/virtio/vhost-user-scmi.c index 300847e..f9264c4 100644 --- a/hw/virtio/vhost-user-scmi.c +++ b/hw/virtio/vhost-user-scmi.c @@ -83,7 +83,7 @@ err_host_notifiers: return ret; } -static void vu_scmi_stop(VirtIODevice *vdev) +static int vu_scmi_stop(VirtIODevice *vdev) { VHostUserSCMI *scmi = VHOST_USER_SCMI(vdev); BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); @@ -93,41 +93,46 @@ static void vu_scmi_stop(VirtIODevice *vdev) /* vhost_dev_is_started() check in the callers is not fully reliable. */ if (!scmi->started_vu) { - return; + return 0; } scmi->started_vu = false; if (!k->set_guest_notifiers) { - return; + return 0; } - vhost_dev_stop(vhost_dev, vdev, true); + ret = vhost_dev_stop(vhost_dev, vdev, true); - ret = k->set_guest_notifiers(qbus->parent, vhost_dev->nvqs, false); - if (ret < 0) { + if (k->set_guest_notifiers(qbus->parent, vhost_dev->nvqs, false) < 0) { error_report("vhost guest notifier cleanup failed: %d", ret); - return; + return -1; } vhost_dev_disable_notifiers(vhost_dev, vdev); + return ret; } -static void vu_scmi_set_status(VirtIODevice *vdev, uint8_t status) +static int vu_scmi_set_status(VirtIODevice *vdev, uint8_t status) { VHostUserSCMI *scmi = VHOST_USER_SCMI(vdev); bool should_start = virtio_device_should_start(vdev, status); if (!scmi->connected) { - return; + return -1; } if (vhost_dev_is_started(&scmi->vhost_dev) == should_start) { - return; + return 0; } if (should_start) { vu_scmi_start(vdev); } else { - vu_scmi_stop(vdev); + int ret; + ret = vu_scmi_stop(vdev); + if (ret < 0) { + return ret; + } } + return 0; } static uint64_t vu_scmi_get_features(VirtIODevice *vdev, uint64_t features, @@ -258,8 +263,6 @@ static void vu_scmi_device_realize(DeviceState *dev, Error **errp) qemu_chr_fe_set_handlers(&scmi->chardev, NULL, NULL, vu_scmi_event, NULL, dev, NULL, true); - - return; } static void vu_scmi_device_unrealize(DeviceState *dev) @@ -277,12 +280,11 @@ static const VMStateDescription vu_scmi_vmstate = { .unmigratable = 1, }; -static Property vu_scmi_properties[] = { +static const Property vu_scmi_properties[] = { DEFINE_PROP_CHR("chardev", VHostUserSCMI, chardev), - DEFINE_PROP_END_OF_LIST(), }; -static void vu_scmi_class_init(ObjectClass *klass, void *data) +static void vu_scmi_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); diff --git a/hw/virtio/vhost-user-scsi-pci.c b/hw/virtio/vhost-user-scsi-pci.c index 75882e3..994e51a 100644 --- a/hw/virtio/vhost-user-scsi-pci.c +++ b/hw/virtio/vhost-user-scsi-pci.c @@ -29,7 +29,7 @@ #include "hw/pci/msi.h" #include "hw/pci/msix.h" #include "hw/loader.h" -#include "sysemu/kvm.h" +#include "system/kvm.h" #include "hw/virtio/virtio-pci.h" #include "qom/object.h" @@ -44,10 +44,9 @@ struct VHostUserSCSIPCI { VHostUserSCSI vdev; }; -static Property vhost_user_scsi_pci_properties[] = { +static const Property vhost_user_scsi_pci_properties[] = { DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, DEV_NVECTORS_UNSPECIFIED), - DEFINE_PROP_END_OF_LIST(), }; static void vhost_user_scsi_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) @@ -68,7 +67,7 @@ static void vhost_user_scsi_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) qdev_realize(vdev, BUS(&vpci_dev->bus), errp); } -static void vhost_user_scsi_pci_class_init(ObjectClass *klass, void *data) +static void vhost_user_scsi_pci_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); diff --git a/hw/virtio/vhost-user-snd-pci.c b/hw/virtio/vhost-user-snd-pci.c index d61cfda..f5015fb 100644 --- a/hw/virtio/vhost-user-snd-pci.c +++ b/hw/virtio/vhost-user-snd-pci.c @@ -23,10 +23,6 @@ typedef struct VHostUserSoundPCI VHostUserSoundPCI; DECLARE_INSTANCE_CHECKER(VHostUserSoundPCI, VHOST_USER_SND_PCI, TYPE_VHOST_USER_SND_PCI) -static Property vhost_user_snd_pci_properties[] = { - DEFINE_PROP_END_OF_LIST(), -}; - static void vhost_user_snd_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) { VHostUserSoundPCI *dev = VHOST_USER_SND_PCI(vpci_dev); @@ -37,14 +33,13 @@ static void vhost_user_snd_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) qdev_realize(vdev, BUS(&vpci_dev->bus), errp); } -static void vhost_user_snd_pci_class_init(ObjectClass *klass, void *data) +static void vhost_user_snd_pci_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass); k->realize = vhost_user_snd_pci_realize; set_bit(DEVICE_CATEGORY_SOUND, dc->categories); - device_class_set_props(dc, vhost_user_snd_pci_properties); pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET; pcidev_k->device_id = 0; /* Set by virtio-pci based on virtio id */ pcidev_k->revision = 0x00; diff --git a/hw/virtio/vhost-user-snd.c b/hw/virtio/vhost-user-snd.c index 9a21754..732411c 100644 --- a/hw/virtio/vhost-user-snd.c +++ b/hw/virtio/vhost-user-snd.c @@ -16,30 +16,45 @@ #include "standard-headers/linux/virtio_ids.h" #include "standard-headers/linux/virtio_snd.h" +static const VirtIOFeature feature_sizes[] = { + {.flags = 1ULL << VIRTIO_SND_F_CTLS, + .end = endof(struct virtio_snd_config, controls)}, + {} +}; + +static const VirtIOConfigSizeParams cfg_size_params = { + .min_size = endof(struct virtio_snd_config, chmaps), + .max_size = sizeof(struct virtio_snd_config), + .feature_sizes = feature_sizes +}; + static const VMStateDescription vu_snd_vmstate = { .name = "vhost-user-snd", .unmigratable = 1, }; -static Property vsnd_properties[] = { +static const Property vsnd_properties[] = { DEFINE_PROP_CHR("chardev", VHostUserBase, chardev), - DEFINE_PROP_END_OF_LIST(), + DEFINE_PROP_BIT64("controls", VHostUserBase, + parent_obj.host_features, VIRTIO_SND_F_CTLS, false), }; static void vu_snd_base_realize(DeviceState *dev, Error **errp) { VHostUserBase *vub = VHOST_USER_BASE(dev); VHostUserBaseClass *vubs = VHOST_USER_BASE_GET_CLASS(dev); + VirtIODevice *vdev = &vub->parent_obj; vub->virtio_id = VIRTIO_ID_SOUND; vub->num_vqs = 4; - vub->config_size = sizeof(struct virtio_snd_config); + vub->config_size = virtio_get_config_size(&cfg_size_params, + vdev->host_features); vub->vq_size = 64; vubs->parent_realize(dev, errp); } -static void vu_snd_class_init(ObjectClass *klass, void *data) +static void vu_snd_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); VHostUserBaseClass *vubc = VHOST_USER_BASE_CLASS(klass); diff --git a/hw/virtio/vhost-user-vsock-pci.c b/hw/virtio/vhost-user-vsock-pci.c index e5a86e8..adb877b 100644 --- a/hw/virtio/vhost-user-vsock-pci.c +++ b/hw/virtio/vhost-user-vsock-pci.c @@ -31,9 +31,8 @@ struct VHostUserVSockPCI { /* vhost-user-vsock-pci */ -static Property vhost_user_vsock_pci_properties[] = { +static const Property vhost_user_vsock_pci_properties[] = { DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 3), - DEFINE_PROP_END_OF_LIST(), }; static void vhost_user_vsock_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) @@ -47,7 +46,8 @@ static void vhost_user_vsock_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) qdev_realize(vdev, BUS(&vpci_dev->bus), errp); } -static void vhost_user_vsock_pci_class_init(ObjectClass *klass, void *data) +static void vhost_user_vsock_pci_class_init(ObjectClass *klass, + const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); diff --git a/hw/virtio/vhost-user-vsock.c b/hw/virtio/vhost-user-vsock.c index 9431b97..993c287 100644 --- a/hw/virtio/vhost-user-vsock.c +++ b/hw/virtio/vhost-user-vsock.c @@ -21,6 +21,8 @@ static const int user_feature_bits[] = { VIRTIO_RING_F_INDIRECT_DESC, VIRTIO_RING_F_EVENT_IDX, VIRTIO_F_NOTIFY_ON_EMPTY, + VIRTIO_F_IN_ORDER, + VIRTIO_F_NOTIFICATION_DATA, VHOST_INVALID_FEATURE_BIT }; @@ -52,23 +54,28 @@ const VhostDevConfigOps vsock_ops = { .vhost_dev_config_notifier = vuv_handle_config_change, }; -static void vuv_set_status(VirtIODevice *vdev, uint8_t status) +static int vuv_set_status(VirtIODevice *vdev, uint8_t status) { VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); bool should_start = virtio_device_should_start(vdev, status); + int ret; if (vhost_dev_is_started(&vvc->vhost_dev) == should_start) { - return; + return 0; } if (should_start) { - int ret = vhost_vsock_common_start(vdev); + ret = vhost_vsock_common_start(vdev); if (ret < 0) { - return; + return ret; } } else { - vhost_vsock_common_stop(vdev); + ret = vhost_vsock_common_stop(vdev); + if (ret < 0) { + return ret; + } } + return 0; } static uint64_t vuv_get_features(VirtIODevice *vdev, @@ -126,7 +133,6 @@ err_vhost_dev: err_virtio: vhost_vsock_common_unrealize(vdev); vhost_user_cleanup(&vsock->vhost_user); - return; } static void vuv_device_unrealize(DeviceState *dev) @@ -146,12 +152,11 @@ static void vuv_device_unrealize(DeviceState *dev) } -static Property vuv_properties[] = { +static const Property vuv_properties[] = { DEFINE_PROP_CHR("chardev", VHostUserVSock, conf.chardev), - DEFINE_PROP_END_OF_LIST(), }; -static void vuv_class_init(ObjectClass *klass, void *data) +static void vuv_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index cdf9af4..1e1d6b0 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -19,16 +19,16 @@ #include "hw/virtio/virtio-net.h" #include "chardev/char-fe.h" #include "io/channel-socket.h" -#include "sysemu/kvm.h" +#include "system/kvm.h" #include "qemu/error-report.h" #include "qemu/main-loop.h" #include "qemu/uuid.h" #include "qemu/sockets.h" -#include "sysemu/runstate.h" -#include "sysemu/cryptodev.h" +#include "system/runstate.h" +#include "system/cryptodev.h" #include "migration/postcopy-ram.h" #include "trace.h" -#include "exec/ramblock.h" +#include "system/ramblock.h" #include <sys/ioctl.h> #include <sys/socket.h> @@ -371,6 +371,7 @@ static bool vhost_user_per_device_request(VhostUserRequest request) case VHOST_USER_RESET_DEVICE: case VHOST_USER_ADD_MEM_REG: case VHOST_USER_REM_MEM_REG: + case VHOST_USER_SET_LOG_BASE: return true; default: return false; @@ -653,8 +654,6 @@ static void scrub_shadow_regions(struct vhost_dev *dev, } *nr_rem_reg = rm_idx; *nr_add_reg = add_idx; - - return; } static int send_remove_regions(struct vhost_dev *dev, @@ -1184,9 +1183,16 @@ static int vhost_user_set_vring_num(struct vhost_dev *dev, static void vhost_user_host_notifier_free(VhostUserHostNotifier *n) { - assert(n && n->unmap_addr); - munmap(n->unmap_addr, qemu_real_host_page_size()); - n->unmap_addr = NULL; + if (n->unmap_addr) { + munmap(n->unmap_addr, qemu_real_host_page_size()); + n->unmap_addr = NULL; + } + if (n->destroy) { + memory_region_transaction_begin(); + object_unparent(OBJECT(&n->mr)); + memory_region_transaction_commit(); + g_free(n); + } } /* @@ -1194,17 +1200,28 @@ static void vhost_user_host_notifier_free(VhostUserHostNotifier *n) * under rcu. */ static void vhost_user_host_notifier_remove(VhostUserHostNotifier *n, - VirtIODevice *vdev) + VirtIODevice *vdev, bool destroy) { + /* + * if destroy == false and n->addr == NULL, we have nothing to do. + * so, just return. + */ + if (!n || (!destroy && !n->addr)) { + return; + } + if (n->addr) { if (vdev) { + memory_region_transaction_begin(); virtio_queue_set_host_notifier_mr(vdev, n->idx, &n->mr, false); + memory_region_transaction_commit(); } assert(!n->unmap_addr); n->unmap_addr = n->addr; n->addr = NULL; - call_rcu(n, vhost_user_host_notifier_free, rcu); } + n->destroy = destroy; + call_rcu(n, vhost_user_host_notifier_free, rcu); } static int vhost_user_set_vring_base(struct vhost_dev *dev, @@ -1278,9 +1295,7 @@ static int vhost_user_get_vring_base(struct vhost_dev *dev, struct vhost_user *u = dev->opaque; VhostUserHostNotifier *n = fetch_notifier(u->user, ring->index); - if (n) { - vhost_user_host_notifier_remove(n, dev->vdev); - } + vhost_user_host_notifier_remove(n, dev->vdev, false); ret = vhost_user_write(dev, &msg, NULL, 0); if (ret < 0) { @@ -1561,7 +1576,7 @@ static int vhost_user_backend_handle_vring_host_notifier(struct vhost_dev *dev, * new mapped address. */ n = fetch_or_create_notifier(user, queue_idx); - vhost_user_host_notifier_remove(n, vdev); + vhost_user_host_notifier_remove(n, vdev, false); if (area->u64 & VHOST_USER_VRING_NOFD_MASK) { return 0; @@ -1606,9 +1621,14 @@ vhost_user_backend_handle_shared_object_add(struct vhost_dev *dev, QemuUUID uuid; memcpy(uuid.data, object->uuid, sizeof(object->uuid)); - return virtio_add_vhost_device(&uuid, dev); + return !virtio_add_vhost_device(&uuid, dev); } +/* + * Handle VHOST_USER_BACKEND_SHARED_OBJECT_REMOVE backend requests. + * + * Return: 0 on success, 1 on error. + */ static int vhost_user_backend_handle_shared_object_remove(struct vhost_dev *dev, VhostUserShared *object) @@ -1622,16 +1642,16 @@ vhost_user_backend_handle_shared_object_remove(struct vhost_dev *dev, struct vhost_dev *owner = virtio_lookup_vhost_device(&uuid); if (dev != owner) { /* Not allowed to remove non-owned entries */ - return 0; + return 1; } break; } default: /* Not allowed to remove non-owned entries */ - return 0; + return 1; } - return virtio_remove_resource(&uuid); + return !virtio_remove_resource(&uuid); } static bool vhost_user_send_resp(QIOChannel *ioc, VhostUserHeader *hdr, @@ -2735,15 +2755,7 @@ static int vhost_user_set_inflight_fd(struct vhost_dev *dev, static void vhost_user_state_destroy(gpointer data) { VhostUserHostNotifier *n = (VhostUserHostNotifier *) data; - if (n) { - vhost_user_host_notifier_remove(n, NULL); - object_unparent(OBJECT(&n->mr)); - /* - * We can't free until vhost_user_host_notifier_remove has - * done it's thing so schedule the free with RCU. - */ - g_free_rcu(n, rcu); - } + vhost_user_host_notifier_remove(n, NULL, true); } bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp) @@ -2764,9 +2776,7 @@ void vhost_user_cleanup(VhostUserState *user) if (!user->chr) { return; } - memory_region_transaction_begin(); user->notifiers = (GPtrArray *) g_ptr_array_free(user->notifiers, true); - memory_region_transaction_commit(); user->chr = NULL; } @@ -2776,25 +2786,13 @@ typedef struct { DeviceState *dev; CharBackend *cd; struct vhost_dev *vhost; - IOEventHandler *event_cb; } VhostAsyncCallback; static void vhost_user_async_close_bh(void *opaque) { VhostAsyncCallback *data = opaque; - struct vhost_dev *vhost = data->vhost; - /* - * If the vhost_dev has been cleared in the meantime there is - * nothing left to do as some other path has completed the - * cleanup. - */ - if (vhost->vdev) { - data->cb(data->dev); - } else if (data->event_cb) { - qemu_chr_fe_set_handlers(data->cd, NULL, NULL, data->event_cb, - NULL, data->dev, NULL, true); - } + data->cb(data->dev); g_free(data); } @@ -2806,8 +2804,7 @@ static void vhost_user_async_close_bh(void *opaque) */ void vhost_user_async_close(DeviceState *d, CharBackend *chardev, struct vhost_dev *vhost, - vu_async_close_fn cb, - IOEventHandler *event_cb) + vu_async_close_fn cb) { if (!runstate_check(RUN_STATE_SHUTDOWN)) { /* @@ -2823,7 +2820,6 @@ void vhost_user_async_close(DeviceState *d, data->dev = d; data->cd = chardev; data->vhost = vhost; - data->event_cb = event_cb; /* Disable any further notifications on the chardev */ qemu_chr_fe_set_handlers(chardev, diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index 3cdaa12..7061b6e 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -20,7 +20,7 @@ #include "hw/virtio/virtio-net.h" #include "hw/virtio/vhost-shadow-virtqueue.h" #include "hw/virtio/vhost-vdpa.h" -#include "exec/address-spaces.h" +#include "system/address-spaces.h" #include "migration/blocker.h" #include "qemu/cutils.h" #include "qemu/main-loop.h" @@ -209,6 +209,8 @@ static void vhost_vdpa_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) int ret; Int128 llend; Error *local_err = NULL; + MemoryRegion *mr; + hwaddr xlat; if (iotlb->target_as != &address_space_memory) { error_report("Wrong target AS \"%s\", only system memory is allowed", @@ -228,11 +230,14 @@ static void vhost_vdpa_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) { bool read_only; - if (!memory_get_xlat_addr(iotlb, &vaddr, NULL, &read_only, NULL, - &local_err)) { + mr = memory_translate_iotlb(iotlb, &xlat, &local_err); + if (!mr) { error_report_err(local_err); return; } + vaddr = memory_region_get_ram_ptr(mr) + xlat; + read_only = !(iotlb->perm & IOMMU_WO) || mr->readonly; + ret = vhost_vdpa_dma_map(s, VHOST_VDPA_GUEST_PA_ASID, iova, iotlb->addr_mask + 1, vaddr, read_only); if (ret) { @@ -288,8 +293,6 @@ static void vhost_vdpa_iommu_region_add(MemoryListener *listener, QLIST_INSERT_HEAD(&s->iommu_list, iommu, iommu_next); memory_region_iommu_replay(iommu->iommu_mr, &iommu->n); - - return; } static void vhost_vdpa_iommu_region_del(MemoryListener *listener, @@ -360,14 +363,20 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, llsize = int128_sub(llend, int128_make64(iova)); if (s->shadow_data) { int r; + hwaddr gpa = section->offset_within_address_space; - mem_region.translated_addr = (hwaddr)(uintptr_t)vaddr, mem_region.size = int128_get64(llsize) - 1, mem_region.perm = IOMMU_ACCESS_FLAG(true, section->readonly), - r = vhost_iova_tree_map_alloc(s->iova_tree, &mem_region); + r = vhost_iova_tree_map_alloc_gpa(s->iova_tree, &mem_region, gpa); if (unlikely(r != IOVA_OK)) { error_report("Can't allocate a mapping (%d)", r); + + if (mem_region.translated_addr == gpa) { + error_report("Insertion to GPA->IOVA tree failed"); + /* Remove the mapping from the IOVA-only tree */ + goto fail_map; + } goto fail; } @@ -386,7 +395,7 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, fail_map: if (s->shadow_data) { - vhost_iova_tree_remove(s->iova_tree, mem_region); + vhost_iova_tree_remove_gpa(s->iova_tree, mem_region); } fail: @@ -440,21 +449,18 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, if (s->shadow_data) { const DMAMap *result; - const void *vaddr = memory_region_get_ram_ptr(section->mr) + - section->offset_within_region + - (iova - section->offset_within_address_space); DMAMap mem_region = { - .translated_addr = (hwaddr)(uintptr_t)vaddr, + .translated_addr = section->offset_within_address_space, .size = int128_get64(llsize) - 1, }; - result = vhost_iova_tree_find_iova(s->iova_tree, &mem_region); + result = vhost_iova_tree_find_gpa(s->iova_tree, &mem_region); if (!result) { /* The memory listener map wasn't mapped */ return; } iova = result->iova; - vhost_iova_tree_remove(s->iova_tree, *result); + vhost_iova_tree_remove_gpa(s->iova_tree, *result); } vhost_vdpa_iotlb_batch_begin_once(s); /* @@ -593,6 +599,36 @@ static void vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v) v->shadow_vqs = g_steal_pointer(&shadow_vqs); } +static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) +{ + struct vhost_vdpa *v = dev->opaque; + + uint64_t features; + uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 | + 0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH | + 0x1ULL << VHOST_BACKEND_F_IOTLB_ASID | + 0x1ULL << VHOST_BACKEND_F_SUSPEND; + int r; + + if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) { + return -EFAULT; + } + + features &= f; + + if (vhost_vdpa_first_dev(dev)) { + r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features); + if (r) { + return -EFAULT; + } + } + + dev->backend_cap = features; + v->shared->backend_cap = features; + + return 0; +} + static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) { struct vhost_vdpa *v = opaque; @@ -602,7 +638,12 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) v->dev = dev; dev->opaque = opaque ; - v->shared->listener = vhost_vdpa_memory_listener; + + ret = vhost_vdpa_set_backend_cap(dev); + if (unlikely(ret != 0)) { + return ret; + } + vhost_vdpa_init_svq(dev, v); error_propagate(&dev->migration_blocker, v->migration_blocker); @@ -638,6 +679,7 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER); + v->shared->listener = vhost_vdpa_memory_listener; return 0; } @@ -840,36 +882,6 @@ static int vhost_vdpa_set_features(struct vhost_dev *dev, return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK); } -static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) -{ - struct vhost_vdpa *v = dev->opaque; - - uint64_t features; - uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 | - 0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH | - 0x1ULL << VHOST_BACKEND_F_IOTLB_ASID | - 0x1ULL << VHOST_BACKEND_F_SUSPEND; - int r; - - if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) { - return -EFAULT; - } - - features &= f; - - if (vhost_vdpa_first_dev(dev)) { - r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features); - if (r) { - return -EFAULT; - } - } - - dev->backend_cap = features; - v->shared->backend_cap = features; - - return 0; -} - static int vhost_vdpa_get_device_id(struct vhost_dev *dev, uint32_t *device_id) { @@ -887,8 +899,14 @@ static int vhost_vdpa_reset_device(struct vhost_dev *dev) ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &status); trace_vhost_vdpa_reset_device(dev); + if (ret) { + return ret; + } + + memory_listener_unregister(&v->shared->listener); + v->shared->listener_registered = false; v->suspended = false; - return ret; + return 0; } static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx) @@ -1142,16 +1160,23 @@ static void vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev, * * @v: Vhost-vdpa device * @needle: The area to search iova + * @taddr: The translated address (HVA) * @errorp: Error pointer */ static bool vhost_vdpa_svq_map_ring(struct vhost_vdpa *v, DMAMap *needle, - Error **errp) + hwaddr taddr, Error **errp) { int r; - r = vhost_iova_tree_map_alloc(v->shared->iova_tree, needle); + r = vhost_iova_tree_map_alloc(v->shared->iova_tree, needle, taddr); if (unlikely(r != IOVA_OK)) { error_setg(errp, "Cannot allocate iova (%d)", r); + + if (needle->translated_addr == taddr) { + error_append_hint(errp, "Insertion to IOVA->HVA tree failed"); + /* Remove the mapping from the IOVA-only tree */ + vhost_iova_tree_remove(v->shared->iova_tree, *needle); + } return false; } @@ -1192,11 +1217,11 @@ static bool vhost_vdpa_svq_map_rings(struct vhost_dev *dev, vhost_svq_get_vring_addr(svq, &svq_addr); driver_region = (DMAMap) { - .translated_addr = svq_addr.desc_user_addr, .size = driver_size - 1, .perm = IOMMU_RO, }; - ok = vhost_vdpa_svq_map_ring(v, &driver_region, errp); + ok = vhost_vdpa_svq_map_ring(v, &driver_region, svq_addr.desc_user_addr, + errp); if (unlikely(!ok)) { error_prepend(errp, "Cannot create vq driver region: "); return false; @@ -1206,11 +1231,11 @@ static bool vhost_vdpa_svq_map_rings(struct vhost_dev *dev, addr->avail_user_addr = driver_region.iova + avail_offset; device_region = (DMAMap) { - .translated_addr = svq_addr.used_user_addr, .size = device_size - 1, .perm = IOMMU_RW, }; - ok = vhost_vdpa_svq_map_ring(v, &device_region, errp); + ok = vhost_vdpa_svq_map_ring(v, &device_region, svq_addr.used_user_addr, + errp); if (unlikely(!ok)) { error_prepend(errp, "Cannot create vq device region: "); vhost_vdpa_svq_unmap_ring(v, driver_region.translated_addr); @@ -1365,7 +1390,15 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) "IOMMU and try again"); return -1; } - memory_listener_register(&v->shared->listener, dev->vdev->dma_as); + if (v->shared->listener_registered && + dev->vdev->dma_as != v->shared->listener.address_space) { + memory_listener_unregister(&v->shared->listener); + v->shared->listener_registered = false; + } + if (!v->shared->listener_registered) { + memory_listener_register(&v->shared->listener, dev->vdev->dma_as); + v->shared->listener_registered = true; + } return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); } @@ -1375,8 +1408,6 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) static void vhost_vdpa_reset_status(struct vhost_dev *dev) { - struct vhost_vdpa *v = dev->opaque; - if (!vhost_vdpa_last_dev(dev)) { return; } @@ -1384,7 +1415,6 @@ static void vhost_vdpa_reset_status(struct vhost_dev *dev) vhost_vdpa_reset_device(dev); vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER); - memory_listener_unregister(&v->shared->listener); } static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base, @@ -1518,12 +1548,27 @@ static int vhost_vdpa_get_features(struct vhost_dev *dev, static int vhost_vdpa_set_owner(struct vhost_dev *dev) { + int r; + struct vhost_vdpa *v; + if (!vhost_vdpa_first_dev(dev)) { return 0; } trace_vhost_vdpa_set_owner(dev); - return vhost_vdpa_call(dev, VHOST_SET_OWNER, NULL); + r = vhost_vdpa_call(dev, VHOST_SET_OWNER, NULL); + if (unlikely(r < 0)) { + return r; + } + + /* + * Being optimistic and listening address space memory. If the device + * uses vIOMMU, it is changed at vhost_vdpa_dev_start. + */ + v = dev->opaque; + memory_listener_register(&v->shared->listener, &address_space_memory); + v->shared->listener_registered = true; + return 0; } static int vhost_vdpa_vq_get_addr(struct vhost_dev *dev, @@ -1555,7 +1600,6 @@ const VhostOps vdpa_ops = { .vhost_set_vring_kick = vhost_vdpa_set_vring_kick, .vhost_set_vring_call = vhost_vdpa_set_vring_call, .vhost_get_features = vhost_vdpa_get_features, - .vhost_set_backend_cap = vhost_vdpa_set_backend_cap, .vhost_set_owner = vhost_vdpa_set_owner, .vhost_set_vring_endian = NULL, .vhost_backend_memslots_limit = vhost_vdpa_memslots_limit, diff --git a/hw/virtio/vhost-vsock-common.c b/hw/virtio/vhost-vsock-common.c index 12ea87d..c6c44d8 100644 --- a/hw/virtio/vhost-vsock-common.c +++ b/hw/virtio/vhost-vsock-common.c @@ -22,6 +22,7 @@ const int feature_bits[] = { VIRTIO_VSOCK_F_SEQPACKET, VIRTIO_F_RING_RESET, + VIRTIO_F_RING_PACKED, VHOST_INVALID_FEATURE_BIT }; @@ -94,7 +95,7 @@ err_host_notifiers: return ret; } -void vhost_vsock_common_stop(VirtIODevice *vdev) +int vhost_vsock_common_stop(VirtIODevice *vdev) { VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); @@ -102,18 +103,18 @@ void vhost_vsock_common_stop(VirtIODevice *vdev) int ret; if (!k->set_guest_notifiers) { - return; + return 0; } - vhost_dev_stop(&vvc->vhost_dev, vdev, true); + ret = vhost_dev_stop(&vvc->vhost_dev, vdev, true); - ret = k->set_guest_notifiers(qbus->parent, vvc->vhost_dev.nvqs, false); - if (ret < 0) { + if (k->set_guest_notifiers(qbus->parent, vvc->vhost_dev.nvqs, false) < 0) { error_report("vhost guest notifier cleanup failed: %d", ret); - return; + return -1; } vhost_dev_disable_notifiers(&vvc->vhost_dev, vdev); + return ret; } @@ -284,13 +285,12 @@ static struct vhost_dev *vhost_vsock_common_get_vhost(VirtIODevice *vdev) return &vvc->vhost_dev; } -static Property vhost_vsock_common_properties[] = { +static const Property vhost_vsock_common_properties[] = { DEFINE_PROP_ON_OFF_AUTO("seqpacket", VHostVSockCommon, seqpacket, ON_OFF_AUTO_AUTO), - DEFINE_PROP_END_OF_LIST(), }; -static void vhost_vsock_common_class_init(ObjectClass *klass, void *data) +static void vhost_vsock_common_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); diff --git a/hw/virtio/vhost-vsock-pci.c b/hw/virtio/vhost-vsock-pci.c index 9f34414..0022a71 100644 --- a/hw/virtio/vhost-vsock-pci.c +++ b/hw/virtio/vhost-vsock-pci.c @@ -35,9 +35,8 @@ struct VHostVSockPCI { /* vhost-vsock-pci */ -static Property vhost_vsock_pci_properties[] = { +static const Property vhost_vsock_pci_properties[] = { DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 3), - DEFINE_PROP_END_OF_LIST(), }; static void vhost_vsock_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) @@ -57,7 +56,7 @@ static void vhost_vsock_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) qdev_realize(vdev, BUS(&vpci_dev->bus), errp); } -static void vhost_vsock_pci_class_init(ObjectClass *klass, void *data) +static void vhost_vsock_pci_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); diff --git a/hw/virtio/vhost-vsock.c b/hw/virtio/vhost-vsock.c index 3d4a5a9..6e40888 100644 --- a/hw/virtio/vhost-vsock.c +++ b/hw/virtio/vhost-vsock.c @@ -67,37 +67,38 @@ static int vhost_vsock_set_running(VirtIODevice *vdev, int start) } -static void vhost_vsock_set_status(VirtIODevice *vdev, uint8_t status) +static int vhost_vsock_set_status(VirtIODevice *vdev, uint8_t status) { VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); bool should_start = virtio_device_should_start(vdev, status); int ret; if (vhost_dev_is_started(&vvc->vhost_dev) == should_start) { - return; + return 0; } if (should_start) { ret = vhost_vsock_common_start(vdev); if (ret < 0) { - return; + return 0; } ret = vhost_vsock_set_running(vdev, 1); if (ret < 0) { vhost_vsock_common_stop(vdev); error_report("Error starting vhost vsock: %d", -ret); - return; + return 0; } } else { ret = vhost_vsock_set_running(vdev, 0); if (ret < 0) { error_report("vhost vsock set running failed: %d", ret); - return; + return 0; } vhost_vsock_common_stop(vdev); } + return 0; } static uint64_t vhost_vsock_get_features(VirtIODevice *vdev, @@ -205,13 +206,12 @@ static void vhost_vsock_device_unrealize(DeviceState *dev) vhost_vsock_common_unrealize(vdev); } -static Property vhost_vsock_properties[] = { +static const Property vhost_vsock_properties[] = { DEFINE_PROP_UINT64("guest-cid", VHostVSock, conf.guest_cid, 0), DEFINE_PROP_STRING("vhostfd", VHostVSock, conf.vhostfd), - DEFINE_PROP_END_OF_LIST(), }; -static void vhost_vsock_class_init(ObjectClass *klass, void *data) +static void vhost_vsock_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 4acd77e..fc43853 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -26,7 +26,7 @@ #include "hw/mem/memory-device.h" #include "migration/blocker.h" #include "migration/qemu-file-types.h" -#include "sysemu/dma.h" +#include "system/dma.h" #include "trace.h" /* enabled until disconnected backend stabilizes */ @@ -43,8 +43,9 @@ do { } while (0) #endif -static struct vhost_log *vhost_log; -static struct vhost_log *vhost_log_shm; +static struct vhost_log *vhost_log[VHOST_BACKEND_TYPE_MAX]; +static struct vhost_log *vhost_log_shm[VHOST_BACKEND_TYPE_MAX]; +static QLIST_HEAD(, vhost_dev) vhost_log_devs[VHOST_BACKEND_TYPE_MAX]; /* Memslots used by backends that support private memslots (without an fd). */ static unsigned int used_memslots; @@ -149,6 +150,47 @@ bool vhost_dev_has_iommu(struct vhost_dev *dev) } } +static inline bool vhost_dev_should_log(struct vhost_dev *dev) +{ + assert(dev->vhost_ops); + assert(dev->vhost_ops->backend_type > VHOST_BACKEND_TYPE_NONE); + assert(dev->vhost_ops->backend_type < VHOST_BACKEND_TYPE_MAX); + + return dev == QLIST_FIRST(&vhost_log_devs[dev->vhost_ops->backend_type]); +} + +static inline void vhost_dev_elect_mem_logger(struct vhost_dev *hdev, bool add) +{ + VhostBackendType backend_type; + + assert(hdev->vhost_ops); + + backend_type = hdev->vhost_ops->backend_type; + assert(backend_type > VHOST_BACKEND_TYPE_NONE); + assert(backend_type < VHOST_BACKEND_TYPE_MAX); + + if (add && !QLIST_IS_INSERTED(hdev, logdev_entry)) { + if (QLIST_EMPTY(&vhost_log_devs[backend_type])) { + QLIST_INSERT_HEAD(&vhost_log_devs[backend_type], + hdev, logdev_entry); + } else { + /* + * The first vhost_device in the list is selected as the shared + * logger to scan memory sections. Put new entry next to the head + * to avoid inadvertent change to the underlying logger device. + * This is done in order to get better cache locality and to avoid + * performance churn on the hot path for log scanning. Even when + * new devices come and go quickly, it wouldn't end up changing + * the active leading logger device at all. + */ + QLIST_INSERT_AFTER(QLIST_FIRST(&vhost_log_devs[backend_type]), + hdev, logdev_entry); + } + } else if (!add && QLIST_IS_INSERTED(hdev, logdev_entry)) { + QLIST_REMOVE(hdev, logdev_entry); + } +} + static int vhost_sync_dirty_bitmap(struct vhost_dev *dev, MemoryRegionSection *section, hwaddr first, @@ -166,12 +208,14 @@ static int vhost_sync_dirty_bitmap(struct vhost_dev *dev, start_addr = MAX(first, start_addr); end_addr = MIN(last, end_addr); - for (i = 0; i < dev->mem->nregions; ++i) { - struct vhost_memory_region *reg = dev->mem->regions + i; - vhost_dev_sync_region(dev, section, start_addr, end_addr, - reg->guest_phys_addr, - range_get_last(reg->guest_phys_addr, - reg->memory_size)); + if (vhost_dev_should_log(dev)) { + for (i = 0; i < dev->mem->nregions; ++i) { + struct vhost_memory_region *reg = dev->mem->regions + i; + vhost_dev_sync_region(dev, section, start_addr, end_addr, + reg->guest_phys_addr, + range_get_last(reg->guest_phys_addr, + reg->memory_size)); + } } for (i = 0; i < dev->nvqs; ++i) { struct vhost_virtqueue *vq = dev->vqs + i; @@ -287,6 +331,10 @@ static int vhost_set_backend_type(struct vhost_dev *dev, r = -1; } + if (r == 0) { + assert(dev->vhost_ops->backend_type == backend_type); + } + return r; } @@ -319,16 +367,22 @@ static struct vhost_log *vhost_log_alloc(uint64_t size, bool share) return log; } -static struct vhost_log *vhost_log_get(uint64_t size, bool share) +static struct vhost_log *vhost_log_get(VhostBackendType backend_type, + uint64_t size, bool share) { - struct vhost_log *log = share ? vhost_log_shm : vhost_log; + struct vhost_log *log; + + assert(backend_type > VHOST_BACKEND_TYPE_NONE); + assert(backend_type < VHOST_BACKEND_TYPE_MAX); + + log = share ? vhost_log_shm[backend_type] : vhost_log[backend_type]; if (!log || log->size != size) { log = vhost_log_alloc(size, share); if (share) { - vhost_log_shm = log; + vhost_log_shm[backend_type] = log; } else { - vhost_log = log; + vhost_log[backend_type] = log; } } else { ++log->refcnt; @@ -340,11 +394,20 @@ static struct vhost_log *vhost_log_get(uint64_t size, bool share) static void vhost_log_put(struct vhost_dev *dev, bool sync) { struct vhost_log *log = dev->log; + VhostBackendType backend_type; if (!log) { return; } + assert(dev->vhost_ops); + backend_type = dev->vhost_ops->backend_type; + + if (backend_type == VHOST_BACKEND_TYPE_NONE || + backend_type >= VHOST_BACKEND_TYPE_MAX) { + return; + } + --log->refcnt; if (log->refcnt == 0) { /* Sync only the range covered by the old log */ @@ -352,18 +415,19 @@ static void vhost_log_put(struct vhost_dev *dev, bool sync) vhost_log_sync_range(dev, 0, dev->log_size * VHOST_LOG_CHUNK - 1); } - if (vhost_log == log) { + if (vhost_log[backend_type] == log) { g_free(log->log); - vhost_log = NULL; - } else if (vhost_log_shm == log) { + vhost_log[backend_type] = NULL; + } else if (vhost_log_shm[backend_type] == log) { qemu_memfd_free(log->log, log->size * sizeof(*(log->log)), log->fd); - vhost_log_shm = NULL; + vhost_log_shm[backend_type] = NULL; } g_free(log); } + vhost_dev_elect_mem_logger(dev, false); dev->log = NULL; dev->log_size = 0; } @@ -376,7 +440,8 @@ static bool vhost_dev_log_is_shared(struct vhost_dev *dev) static inline void vhost_dev_log_resize(struct vhost_dev *dev, uint64_t size) { - struct vhost_log *log = vhost_log_get(size, vhost_dev_log_is_shared(dev)); + struct vhost_log *log = vhost_log_get(dev->vhost_ops->backend_type, + size, vhost_dev_log_is_shared(dev)); uint64_t log_base = (uintptr_t)log->log; int r; @@ -667,7 +732,6 @@ out: memory_region_unref(old_sections[n_old_sections].mr); } g_free(old_sections); - return; } /* Adds the section data to the tmp_section structure. @@ -978,6 +1042,15 @@ static int vhost_dev_set_log(struct vhost_dev *dev, bool enable_log) goto err_vq; } } + + /* + * At log start we select our vhost_device logger that will scan the + * memory sections and skip for the others. This is possible because + * the log is shared amongst all vhost devices for a given type of + * backend. + */ + vhost_dev_elect_mem_logger(dev, enable_log); + return 0; err_vq: for (; i >= 0; --i) { @@ -1294,10 +1367,10 @@ fail_alloc_desc: return r; } -void vhost_virtqueue_stop(struct vhost_dev *dev, - struct VirtIODevice *vdev, - struct vhost_virtqueue *vq, - unsigned idx) +int vhost_virtqueue_stop(struct vhost_dev *dev, + struct VirtIODevice *vdev, + struct vhost_virtqueue *vq, + unsigned idx) { int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, idx); struct vhost_vring_state state = { @@ -1307,7 +1380,7 @@ void vhost_virtqueue_stop(struct vhost_dev *dev, if (virtio_queue_get_desc_addr(vdev, idx) == 0) { /* Don't stop the virtqueue which might have not been started */ - return; + return 0; } r = dev->vhost_ops->vhost_get_vring_base(dev, &state); @@ -1338,6 +1411,7 @@ void vhost_virtqueue_stop(struct vhost_dev *dev, 0, virtio_queue_get_avail_size(vdev, idx)); vhost_memory_unmap(dev, vq->desc, virtio_queue_get_desc_size(vdev, idx), 0, virtio_queue_get_desc_size(vdev, idx)); + return r; } static int vhost_virtqueue_set_busyloop_timeout(struct vhost_dev *dev, @@ -1608,9 +1682,9 @@ void vhost_dev_cleanup(struct vhost_dev *hdev) memset(hdev, 0, sizeof(struct vhost_dev)); } -static void vhost_dev_disable_notifiers_nvqs(struct vhost_dev *hdev, - VirtIODevice *vdev, - unsigned int nvqs) +void vhost_dev_disable_notifiers_nvqs(struct vhost_dev *hdev, + VirtIODevice *vdev, + unsigned int nvqs) { BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); int i, r; @@ -1856,62 +1930,6 @@ void vhost_dev_free_inflight(struct vhost_inflight *inflight) } } -static int vhost_dev_resize_inflight(struct vhost_inflight *inflight, - uint64_t new_size) -{ - Error *err = NULL; - int fd = -1; - void *addr = qemu_memfd_alloc("vhost-inflight", new_size, - F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL, - &fd, &err); - - if (err) { - error_report_err(err); - return -ENOMEM; - } - - vhost_dev_free_inflight(inflight); - inflight->offset = 0; - inflight->addr = addr; - inflight->fd = fd; - inflight->size = new_size; - - return 0; -} - -void vhost_dev_save_inflight(struct vhost_inflight *inflight, QEMUFile *f) -{ - if (inflight->addr) { - qemu_put_be64(f, inflight->size); - qemu_put_be16(f, inflight->queue_size); - qemu_put_buffer(f, inflight->addr, inflight->size); - } else { - qemu_put_be64(f, 0); - } -} - -int vhost_dev_load_inflight(struct vhost_inflight *inflight, QEMUFile *f) -{ - uint64_t size; - - size = qemu_get_be64(f); - if (!size) { - return 0; - } - - if (inflight->size != size) { - int ret = vhost_dev_resize_inflight(inflight, size); - if (ret < 0) { - return ret; - } - } - inflight->queue_size = qemu_get_be16(f); - - qemu_get_buffer(f, inflight->addr, size); - - return 0; -} - int vhost_dev_prepare_inflight(struct vhost_dev *hdev, VirtIODevice *vdev) { int r; @@ -2044,7 +2062,8 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) uint64_t log_base; hdev->log_size = vhost_get_log_size(hdev); - hdev->log = vhost_log_get(hdev->log_size, + hdev->log = vhost_log_get(hdev->vhost_ops->backend_type, + hdev->log_size, vhost_dev_log_is_shared(hdev)); log_base = (uintptr_t)hdev->log->log; r = hdev->vhost_ops->vhost_set_log_base(hdev, @@ -2054,6 +2073,7 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) VHOST_OPS_DEBUG(r, "vhost_set_log_base failed"); goto fail_log; } + vhost_dev_elect_mem_logger(hdev, true); } if (vrings) { r = vhost_dev_set_vring_enable(hdev, true); @@ -2075,11 +2095,22 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) * vhost-kernel code requires for this.*/ for (i = 0; i < hdev->nvqs; ++i) { struct vhost_virtqueue *vq = hdev->vqs + i; - vhost_device_iotlb_miss(hdev, vq->used_phys, true); + r = vhost_device_iotlb_miss(hdev, vq->used_phys, true); + if (r) { + goto fail_iotlb; + } } } vhost_start_config_intr(hdev); return 0; +fail_iotlb: + if (vhost_dev_has_iommu(hdev) && + hdev->vhost_ops->vhost_set_iotlb_callback) { + hdev->vhost_ops->vhost_set_iotlb_callback(hdev, false); + } + if (hdev->vhost_ops->vhost_dev_start) { + hdev->vhost_ops->vhost_dev_start(hdev, false); + } fail_start: if (vrings) { vhost_dev_set_vring_enable(hdev, false); @@ -2105,9 +2136,10 @@ fail_features: } /* Host notifiers must be enabled at this point. */ -void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) +int vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) { int i; + int rc = 0; /* should only be called after backend is connected */ assert(hdev->vhost_ops); @@ -2126,10 +2158,10 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) vhost_dev_set_vring_enable(hdev, false); } for (i = 0; i < hdev->nvqs; ++i) { - vhost_virtqueue_stop(hdev, - vdev, - hdev->vqs + i, - hdev->vq_index + i); + rc |= vhost_virtqueue_stop(hdev, + vdev, + hdev->vqs + i, + hdev->vq_index + i); } if (hdev->vhost_ops->vhost_reset_status) { hdev->vhost_ops->vhost_reset_status(hdev); @@ -2146,6 +2178,7 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) hdev->started = false; vdev->vhost_started = false; hdev->vdev = NULL; + return rc; } int vhost_net_set_backend(struct vhost_dev *hdev, diff --git a/hw/virtio/virtio-9p-pci.c b/hw/virtio/virtio-9p-pci.c index 94c14f0..594742f 100644 --- a/hw/virtio/virtio-9p-pci.c +++ b/hw/virtio/virtio-9p-pci.c @@ -43,14 +43,13 @@ static void virtio_9p_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) qdev_realize(vdev, BUS(&vpci_dev->bus), errp); } -static Property virtio_9p_pci_properties[] = { +static const Property virtio_9p_pci_properties[] = { DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags, VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, true), DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 2), - DEFINE_PROP_END_OF_LIST(), }; -static void virtio_9p_pci_class_init(ObjectClass *klass, void *data) +static void virtio_9p_pci_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass); diff --git a/hw/virtio/virtio-acpi.c b/hw/virtio/virtio-acpi.c index 230a669..85becef 100644 --- a/hw/virtio/virtio-acpi.c +++ b/hw/virtio/virtio-acpi.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0+ +// SPDX-License-Identifier: GPL-2.0-or-later /* * virtio ACPI Support * diff --git a/hw/virtio/virtio-balloon-pci.c b/hw/virtio/virtio-balloon-pci.c index ce2645b..96e88b6 100644 --- a/hw/virtio/virtio-balloon-pci.c +++ b/hw/virtio/virtio-balloon-pci.c @@ -35,16 +35,27 @@ struct VirtIOBalloonPCI { VirtIOBalloon vdev; }; +static const Property virtio_balloon_properties[] = { + DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags, + VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, true), + DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, + DEV_NVECTORS_UNSPECIFIED), +}; + static void virtio_balloon_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) { VirtIOBalloonPCI *dev = VIRTIO_BALLOON_PCI(vpci_dev); DeviceState *vdev = DEVICE(&dev->vdev); + if (vpci_dev->nvectors == DEV_NVECTORS_UNSPECIFIED) { + vpci_dev->nvectors = 2; + } + vpci_dev->class_code = PCI_CLASS_OTHERS; qdev_realize(vdev, BUS(&vpci_dev->bus), errp); } -static void virtio_balloon_pci_class_init(ObjectClass *klass, void *data) +static void virtio_balloon_pci_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); @@ -55,6 +66,7 @@ static void virtio_balloon_pci_class_init(ObjectClass *klass, void *data) pcidev_k->device_id = PCI_DEVICE_ID_VIRTIO_BALLOON; pcidev_k->revision = VIRTIO_PCI_ABI_VERSION; pcidev_k->class_id = PCI_CLASS_OTHERS; + device_class_set_props(dc, virtio_balloon_properties); } static void virtio_balloon_pci_instance_init(Object *obj) diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c index 609e39a..db787d0 100644 --- a/hw/virtio/virtio-balloon.c +++ b/hw/virtio/virtio-balloon.c @@ -22,16 +22,16 @@ #include "hw/mem/pc-dimm.h" #include "hw/qdev-properties.h" #include "hw/boards.h" -#include "sysemu/balloon.h" +#include "system/balloon.h" #include "hw/virtio/virtio-balloon.h" -#include "exec/address-spaces.h" +#include "system/address-spaces.h" #include "qapi/error.h" #include "qapi/qapi-events-machine.h" #include "qapi/visitor.h" #include "trace.h" #include "qemu/error-report.h" #include "migration/misc.h" - +#include "system/reset.h" #include "hw/virtio/virtio-bus.h" #include "hw/virtio/virtio-access.h" @@ -167,19 +167,33 @@ static void balloon_deflate_page(VirtIOBalloon *balloon, } } +/* + * All stats upto VIRTIO_BALLOON_S_NR /must/ have a + * non-NULL name declared here, since these are used + * as keys for populating the QDict with stats + */ static const char *balloon_stat_names[] = { [VIRTIO_BALLOON_S_SWAP_IN] = "stat-swap-in", [VIRTIO_BALLOON_S_SWAP_OUT] = "stat-swap-out", [VIRTIO_BALLOON_S_MAJFLT] = "stat-major-faults", [VIRTIO_BALLOON_S_MINFLT] = "stat-minor-faults", [VIRTIO_BALLOON_S_MEMFREE] = "stat-free-memory", + [VIRTIO_BALLOON_S_MEMTOT] = "stat-total-memory", [VIRTIO_BALLOON_S_AVAIL] = "stat-available-memory", [VIRTIO_BALLOON_S_CACHES] = "stat-disk-caches", [VIRTIO_BALLOON_S_HTLB_PGALLOC] = "stat-htlb-pgalloc", [VIRTIO_BALLOON_S_HTLB_PGFAIL] = "stat-htlb-pgfail", - [VIRTIO_BALLOON_S_NR] = NULL + + [VIRTIO_BALLOON_S_OOM_KILL] = "stat-oom-kills", + [VIRTIO_BALLOON_S_ALLOC_STALL] = "stat-alloc-stalls", + [VIRTIO_BALLOON_S_ASYNC_SCAN] = "stat-async-scans", + [VIRTIO_BALLOON_S_DIRECT_SCAN] = "stat-direct-scans", + [VIRTIO_BALLOON_S_ASYNC_RECLAIM] = "stat-async-reclaims", + + [VIRTIO_BALLOON_S_DIRECT_RECLAIM] = "stat-direct-reclaims", }; +G_STATIC_ASSERT(G_N_ELEMENTS(balloon_stat_names) == VIRTIO_BALLOON_S_NR); /* * reset_stats - Mark all items in the stats array as unset @@ -896,6 +910,8 @@ static void virtio_balloon_device_realize(DeviceState *dev, Error **errp) } reset_stats(s); + s->stats_last_update = 0; + qemu_register_resettable(OBJECT(dev)); } static void virtio_balloon_device_unrealize(DeviceState *dev) @@ -903,6 +919,7 @@ static void virtio_balloon_device_unrealize(DeviceState *dev) VirtIODevice *vdev = VIRTIO_DEVICE(dev); VirtIOBalloon *s = VIRTIO_BALLOON(dev); + qemu_unregister_resettable(OBJECT(dev)); if (s->free_page_bh) { qemu_bh_delete(s->free_page_bh); object_unref(OBJECT(s->iothread)); @@ -941,7 +958,7 @@ static void virtio_balloon_device_reset(VirtIODevice *vdev) s->poison_val = 0; } -static void virtio_balloon_set_status(VirtIODevice *vdev, uint8_t status) +static int virtio_balloon_set_status(VirtIODevice *vdev, uint8_t status) { VirtIOBalloon *s = VIRTIO_BALLOON(vdev); @@ -971,6 +988,28 @@ static void virtio_balloon_set_status(VirtIODevice *vdev, uint8_t status) qemu_mutex_unlock(&s->free_page_lock); } } + return 0; +} + +static ResettableState *virtio_balloon_get_reset_state(Object *obj) +{ + VirtIOBalloon *s = VIRTIO_BALLOON(obj); + return &s->reset_state; +} + +static void virtio_balloon_reset_enter(Object *obj, ResetType type) +{ + VirtIOBalloon *s = VIRTIO_BALLOON(obj); + + /* + * When waking up from standby/suspend-to-ram, do not reset stats. + */ + if (type == RESET_TYPE_WAKEUP) { + return; + } + + reset_stats(s); + s->stats_last_update = 0; } static void virtio_balloon_instance_init(Object *obj) @@ -1001,7 +1040,7 @@ static const VMStateDescription vmstate_virtio_balloon = { }, }; -static Property virtio_balloon_properties[] = { +static const Property virtio_balloon_properties[] = { DEFINE_PROP_BIT("deflate-on-oom", VirtIOBalloon, host_features, VIRTIO_BALLOON_F_DEFLATE_ON_OOM, false), DEFINE_PROP_BIT("free-page-hint", VirtIOBalloon, host_features, @@ -1018,13 +1057,13 @@ static Property virtio_balloon_properties[] = { qemu_4_0_config_size, false), DEFINE_PROP_LINK("iothread", VirtIOBalloon, iothread, TYPE_IOTHREAD, IOThread *), - DEFINE_PROP_END_OF_LIST(), }; -static void virtio_balloon_class_init(ObjectClass *klass, void *data) +static void virtio_balloon_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); + ResettableClass *rc = RESETTABLE_CLASS(klass); device_class_set_props(dc, virtio_balloon_properties); dc->vmsd = &vmstate_virtio_balloon; @@ -1037,6 +1076,9 @@ static void virtio_balloon_class_init(ObjectClass *klass, void *data) vdc->get_features = virtio_balloon_get_features; vdc->set_status = virtio_balloon_set_status; vdc->vmsd = &vmstate_virtio_balloon_device; + + rc->get_state = virtio_balloon_get_reset_state; + rc->phases.enter = virtio_balloon_reset_enter; } static const TypeInfo virtio_balloon_info = { diff --git a/hw/virtio/virtio-blk-pci.c b/hw/virtio/virtio-blk-pci.c index 9743bee..fd33bbd 100644 --- a/hw/virtio/virtio-blk-pci.c +++ b/hw/virtio/virtio-blk-pci.c @@ -38,13 +38,12 @@ struct VirtIOBlkPCI { VirtIOBlock vdev; }; -static Property virtio_blk_pci_properties[] = { +static const Property virtio_blk_pci_properties[] = { DEFINE_PROP_UINT32("class", VirtIOPCIProxy, class_code, 0), DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags, VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, true), DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, DEV_NVECTORS_UNSPECIFIED), - DEFINE_PROP_END_OF_LIST(), }; static void virtio_blk_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) @@ -64,7 +63,7 @@ static void virtio_blk_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) qdev_realize(vdev, BUS(&vpci_dev->bus), errp); } -static void virtio_blk_pci_class_init(ObjectClass *klass, void *data) +static void virtio_blk_pci_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c index 896feb3..11adfbf 100644 --- a/hw/virtio/virtio-bus.c +++ b/hw/virtio/virtio-bus.c @@ -28,7 +28,7 @@ #include "qapi/error.h" #include "hw/virtio/virtio-bus.h" #include "hw/virtio/virtio.h" -#include "exec/address-spaces.h" +#include "system/address-spaces.h" /* #define DEBUG_VIRTIO_BUS */ @@ -348,7 +348,7 @@ bool virtio_bus_device_iommu_enabled(VirtIODevice *vdev) return klass->iommu_enabled(qbus->parent); } -static void virtio_bus_class_init(ObjectClass *klass, void *data) +static void virtio_bus_class_init(ObjectClass *klass, const void *data) { BusClass *bus_class = BUS_CLASS(klass); bus_class->get_dev_path = virtio_bus_get_dev_path; diff --git a/hw/virtio/virtio-crypto-pci.c b/hw/virtio/virtio-crypto-pci.c index 0783dc2..868abc0 100644 --- a/hw/virtio/virtio-crypto-pci.c +++ b/hw/virtio/virtio-crypto-pci.c @@ -37,11 +37,10 @@ struct VirtIOCryptoPCI { VirtIOCrypto vdev; }; -static Property virtio_crypto_pci_properties[] = { +static const Property virtio_crypto_pci_properties[] = { DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags, VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, true), DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 2), - DEFINE_PROP_END_OF_LIST(), }; static void virtio_crypto_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) @@ -60,7 +59,7 @@ static void virtio_crypto_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) } } -static void virtio_crypto_pci_class_init(ObjectClass *klass, void *data) +static void virtio_crypto_pci_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c index bbe8aa4..517f208 100644 --- a/hw/virtio/virtio-crypto.c +++ b/hw/virtio/virtio-crypto.c @@ -22,7 +22,7 @@ #include "hw/virtio/virtio-crypto.h" #include "hw/qdev-properties.h" #include "standard-headers/linux/virtio_ids.h" -#include "sysemu/cryptodev-vhost.h" +#include "system/cryptodev-vhost.h" #define VIRTIO_CRYPTO_VM_VERSION 1 @@ -205,6 +205,7 @@ virtio_crypto_create_asym_session(VirtIOCrypto *vcrypto, int queue_index; uint32_t algo, keytype, keylen; + sreq->info.op_code = opcode; algo = ldl_le_p(&sess_req->para.algo); keytype = ldl_le_p(&sess_req->para.keytype); keylen = ldl_le_p(&sess_req->para.keylen); @@ -224,7 +225,6 @@ virtio_crypto_create_asym_session(VirtIOCrypto *vcrypto, iov_discard_front(&iov, &out_num, keylen); } - sreq->info.op_code = opcode; asym_info = &sreq->info.u.asym_sess_info; asym_info->algo = algo; asym_info->keytype = keytype; @@ -461,7 +461,7 @@ static void virtio_crypto_init_request(VirtIOCrypto *vcrypto, VirtQueue *vq, req->in_iov = NULL; req->in_num = 0; req->in_len = 0; - req->flags = QCRYPTODEV_BACKEND_ALG__MAX; + req->flags = QCRYPTODEV_BACKEND_ALGO_TYPE__MAX; memset(&req->op_info, 0x00, sizeof(req->op_info)); } @@ -471,7 +471,7 @@ static void virtio_crypto_free_request(VirtIOCryptoReq *req) return; } - if (req->flags == QCRYPTODEV_BACKEND_ALG_SYM) { + if (req->flags == QCRYPTODEV_BACKEND_ALGO_TYPE_SYM) { size_t max_len; CryptoDevBackendSymOpInfo *op_info = req->op_info.u.sym_op_info; @@ -486,7 +486,7 @@ static void virtio_crypto_free_request(VirtIOCryptoReq *req) memset(op_info, 0, sizeof(*op_info) + max_len); g_free(op_info); } - } else if (req->flags == QCRYPTODEV_BACKEND_ALG_ASYM) { + } else if (req->flags == QCRYPTODEV_BACKEND_ALGO_TYPE_ASYM) { CryptoDevBackendAsymOpInfo *op_info = req->op_info.u.asym_op_info; if (op_info) { g_free(op_info->src); @@ -571,10 +571,10 @@ static void virtio_crypto_req_complete(void *opaque, int ret) VirtIODevice *vdev = VIRTIO_DEVICE(vcrypto); uint8_t status = -ret; - if (req->flags == QCRYPTODEV_BACKEND_ALG_SYM) { + if (req->flags == QCRYPTODEV_BACKEND_ALGO_TYPE_SYM) { virtio_crypto_sym_input_data_helper(vdev, req, status, req->op_info.u.sym_op_info); - } else if (req->flags == QCRYPTODEV_BACKEND_ALG_ASYM) { + } else if (req->flags == QCRYPTODEV_BACKEND_ALGO_TYPE_ASYM) { virtio_crypto_akcipher_input_data_helper(vdev, req, status, req->op_info.u.asym_op_info); } @@ -884,7 +884,7 @@ virtio_crypto_handle_request(VirtIOCryptoReq *request) switch (opcode) { case VIRTIO_CRYPTO_CIPHER_ENCRYPT: case VIRTIO_CRYPTO_CIPHER_DECRYPT: - op_info->algtype = request->flags = QCRYPTODEV_BACKEND_ALG_SYM; + op_info->algtype = request->flags = QCRYPTODEV_BACKEND_ALGO_TYPE_SYM; ret = virtio_crypto_handle_sym_req(vcrypto, &req.u.sym_req, op_info, out_iov, out_num); @@ -894,7 +894,7 @@ virtio_crypto_handle_request(VirtIOCryptoReq *request) case VIRTIO_CRYPTO_AKCIPHER_DECRYPT: case VIRTIO_CRYPTO_AKCIPHER_SIGN: case VIRTIO_CRYPTO_AKCIPHER_VERIFY: - op_info->algtype = request->flags = QCRYPTODEV_BACKEND_ALG_ASYM; + op_info->algtype = request->flags = QCRYPTODEV_BACKEND_ALGO_TYPE_ASYM; ret = virtio_crypto_handle_asym_req(vcrypto, &req.u.akcipher_req, op_info, out_iov, out_num); @@ -1008,19 +1008,19 @@ static uint32_t virtio_crypto_init_services(uint32_t qservices) { uint32_t vservices = 0; - if (qservices & (1 << QCRYPTODEV_BACKEND_SERVICE_CIPHER)) { + if (qservices & (1 << QCRYPTODEV_BACKEND_SERVICE_TYPE_CIPHER)) { vservices |= (1 << VIRTIO_CRYPTO_SERVICE_CIPHER); } - if (qservices & (1 << QCRYPTODEV_BACKEND_SERVICE_HASH)) { + if (qservices & (1 << QCRYPTODEV_BACKEND_SERVICE_TYPE_HASH)) { vservices |= (1 << VIRTIO_CRYPTO_SERVICE_HASH); } - if (qservices & (1 << QCRYPTODEV_BACKEND_SERVICE_MAC)) { + if (qservices & (1 << QCRYPTODEV_BACKEND_SERVICE_TYPE_MAC)) { vservices |= (1 << VIRTIO_CRYPTO_SERVICE_MAC); } - if (qservices & (1 << QCRYPTODEV_BACKEND_SERVICE_AEAD)) { + if (qservices & (1 << QCRYPTODEV_BACKEND_SERVICE_TYPE_AEAD)) { vservices |= (1 << VIRTIO_CRYPTO_SERVICE_AEAD); } - if (qservices & (1 << QCRYPTODEV_BACKEND_SERVICE_AKCIPHER)) { + if (qservices & (1 << QCRYPTODEV_BACKEND_SERVICE_TYPE_AKCIPHER)) { vservices |= (1 << VIRTIO_CRYPTO_SERVICE_AKCIPHER); } @@ -1128,10 +1128,9 @@ static const VMStateDescription vmstate_virtio_crypto = { }, }; -static Property virtio_crypto_properties[] = { +static const Property virtio_crypto_properties[] = { DEFINE_PROP_LINK("cryptodev", VirtIOCrypto, conf.cryptodev, TYPE_CRYPTODEV_BACKEND, CryptoDevBackend *), - DEFINE_PROP_END_OF_LIST(), }; static void virtio_crypto_get_config(VirtIODevice *vdev, uint8_t *config) @@ -1198,11 +1197,12 @@ static void virtio_crypto_vhost_status(VirtIOCrypto *c, uint8_t status) } } -static void virtio_crypto_set_status(VirtIODevice *vdev, uint8_t status) +static int virtio_crypto_set_status(VirtIODevice *vdev, uint8_t status) { VirtIOCrypto *vcrypto = VIRTIO_CRYPTO(vdev); virtio_crypto_vhost_status(vcrypto, status); + return 0; } static void virtio_crypto_guest_notifier_mask(VirtIODevice *vdev, int idx, @@ -1247,13 +1247,25 @@ static bool virtio_crypto_guest_notifier_pending(VirtIODevice *vdev, int idx) static struct vhost_dev *virtio_crypto_get_vhost(VirtIODevice *vdev) { VirtIOCrypto *vcrypto = VIRTIO_CRYPTO(vdev); - CryptoDevBackend *b = vcrypto->cryptodev; - CryptoDevBackendClient *cc = b->conf.peers.ccs[0]; - CryptoDevBackendVhost *vhost_crypto = cryptodev_get_vhost(cc, b, 0); + CryptoDevBackend *b; + CryptoDevBackendClient *cc; + CryptoDevBackendVhost *vhost_crypto; + + b = vcrypto->cryptodev; + if (!b) { + return NULL; + } + + cc = b->conf.peers.ccs[0]; + vhost_crypto = cryptodev_get_vhost(cc, b, 0); + if (!vhost_crypto) { + return NULL; + } + return &vhost_crypto->dev; } -static void virtio_crypto_class_init(ObjectClass *klass, void *data) +static void virtio_crypto_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); diff --git a/hw/virtio/virtio-hmp-cmds.c b/hw/virtio/virtio-hmp-cmds.c index 477c97d..7d8677b 100644 --- a/hw/virtio/virtio-hmp-cmds.c +++ b/hw/virtio/virtio-hmp-cmds.c @@ -9,7 +9,7 @@ #include "monitor/hmp.h" #include "monitor/monitor.h" #include "qapi/qapi-commands-virtio.h" -#include "qapi/qmp/qdict.h" +#include "qobject/qdict.h" static void hmp_virtio_dump_protocols(Monitor *mon, diff --git a/hw/virtio/virtio-input-pci.c b/hw/virtio/virtio-input-pci.c index a53edf4..3be5358 100644 --- a/hw/virtio/virtio-input-pci.c +++ b/hw/virtio/virtio-input-pci.c @@ -37,9 +37,8 @@ struct VirtIOInputHIDPCI { VirtIOInputHID vdev; }; -static Property virtio_input_pci_properties[] = { +static const Property virtio_input_pci_properties[] = { DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 2), - DEFINE_PROP_END_OF_LIST(), }; static void virtio_input_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) @@ -51,7 +50,7 @@ static void virtio_input_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) qdev_realize(vdev, BUS(&vpci_dev->bus), errp); } -static void virtio_input_pci_class_init(ObjectClass *klass, void *data) +static void virtio_input_pci_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); @@ -64,7 +63,8 @@ static void virtio_input_pci_class_init(ObjectClass *klass, void *data) pcidev_k->class_id = PCI_CLASS_INPUT_OTHER; } -static void virtio_input_hid_kbd_pci_class_init(ObjectClass *klass, void *data) +static void virtio_input_hid_kbd_pci_class_init(ObjectClass *klass, + const void *data) { PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass); @@ -72,7 +72,7 @@ static void virtio_input_hid_kbd_pci_class_init(ObjectClass *klass, void *data) } static void virtio_input_hid_mouse_pci_class_init(ObjectClass *klass, - void *data) + const void *data) { PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass); diff --git a/hw/virtio/virtio-iommu-pci.c b/hw/virtio/virtio-iommu-pci.c index cbdfe4c..8123c6f 100644 --- a/hw/virtio/virtio-iommu-pci.c +++ b/hw/virtio/virtio-iommu-pci.c @@ -34,12 +34,11 @@ struct VirtIOIOMMUPCI { VirtIOIOMMU vdev; }; -static Property virtio_iommu_pci_properties[] = { +static const Property virtio_iommu_pci_properties[] = { DEFINE_PROP_UINT32("class", VirtIOPCIProxy, class_code, 0), DEFINE_PROP_ARRAY("reserved-regions", VirtIOIOMMUPCI, vdev.nr_prop_resv_regions, vdev.prop_resv_regions, qdev_prop_reserved_region, ReservedRegion), - DEFINE_PROP_END_OF_LIST(), }; static void virtio_iommu_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) @@ -74,7 +73,7 @@ static void virtio_iommu_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) qdev_realize(vdev, BUS(&vpci_dev->bus), errp); } -static void virtio_iommu_pci_class_init(ObjectClass *klass, void *data) +static void virtio_iommu_pci_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c index 1326c6e..3500f1b 100644 --- a/hw/virtio/virtio-iommu.c +++ b/hw/virtio/virtio-iommu.c @@ -25,9 +25,9 @@ #include "exec/target_page.h" #include "hw/qdev-properties.h" #include "hw/virtio/virtio.h" -#include "sysemu/kvm.h" -#include "sysemu/reset.h" -#include "sysemu/sysemu.h" +#include "system/kvm.h" +#include "system/reset.h" +#include "system/system.h" #include "qemu/reserved-region.h" #include "qemu/units.h" #include "qapi/error.h" @@ -69,6 +69,11 @@ typedef struct VirtIOIOMMUMapping { uint32_t flags; } VirtIOIOMMUMapping; +struct hiod_key { + PCIBus *bus; + uint8_t devfn; +}; + static inline uint16_t virtio_iommu_get_bdf(IOMMUDevice *dev) { return PCI_BUILD_BDF(pci_bus_num(dev->bus), dev->devfn); @@ -303,6 +308,7 @@ static void virtio_iommu_detach_endpoint_from_domain(VirtIOIOMMUEndpoint *ep) if (!ep->domain) { return; } + trace_virtio_iommu_detach_endpoint_from_domain(domain->id, ep->id); g_tree_foreach(domain->mappings, virtio_iommu_notify_unmap_cb, ep->iommu_mr); QLIST_REMOVE(ep, next); @@ -462,8 +468,245 @@ static AddressSpace *virtio_iommu_find_add_as(PCIBus *bus, void *opaque, return &sdev->as; } +static gboolean hiod_equal(gconstpointer v1, gconstpointer v2) +{ + const struct hiod_key *key1 = v1; + const struct hiod_key *key2 = v2; + + return (key1->bus == key2->bus) && (key1->devfn == key2->devfn); +} + +static guint hiod_hash(gconstpointer v) +{ + const struct hiod_key *key = v; + guint value = (guint)(uintptr_t)key->bus; + + return (guint)(value << 8 | key->devfn); +} + +static void hiod_destroy(gpointer v) +{ + object_unref(v); +} + +static HostIOMMUDevice * +get_host_iommu_device(VirtIOIOMMU *viommu, PCIBus *bus, int devfn) { + struct hiod_key key = { + .bus = bus, + .devfn = devfn, + }; + + return g_hash_table_lookup(viommu->host_iommu_devices, &key); +} + +/** + * rebuild_resv_regions: rebuild resv regions with both the + * info of host resv ranges and property set resv ranges + */ +static int rebuild_resv_regions(IOMMUDevice *sdev) +{ + GList *l; + int i = 0; + + /* free the existing list and rebuild it from scratch */ + g_list_free_full(sdev->resv_regions, g_free); + sdev->resv_regions = NULL; + + /* First add host reserved regions if any, all tagged as RESERVED */ + for (l = sdev->host_resv_ranges; l; l = l->next) { + ReservedRegion *reg = g_new0(ReservedRegion, 1); + Range *r = (Range *)l->data; + + reg->type = VIRTIO_IOMMU_RESV_MEM_T_RESERVED; + range_set_bounds(®->range, range_lob(r), range_upb(r)); + sdev->resv_regions = resv_region_list_insert(sdev->resv_regions, reg); + trace_virtio_iommu_host_resv_regions(sdev->iommu_mr.parent_obj.name, i, + range_lob(®->range), + range_upb(®->range)); + i++; + } + /* + * then add higher priority reserved regions set by the machine + * through properties + */ + add_prop_resv_regions(sdev); + return 0; +} + +static int virtio_iommu_set_host_iova_ranges(VirtIOIOMMU *s, PCIBus *bus, + int devfn, GList *iova_ranges, + Error **errp) +{ + IOMMUPciBus *sbus = g_hash_table_lookup(s->as_by_busptr, bus); + IOMMUDevice *sdev; + int ret = -EINVAL; + + if (!sbus) { + error_setg(errp, "%s: no IOMMUPciBus found!", __func__); + return ret; + } + + sdev = sbus->pbdev[devfn]; + if (!sdev) { + error_setg(errp, "%s: no IOMMUDevice found!", __func__); + return ret; + } + + if (sdev->host_resv_ranges) { + error_setg(errp, "%s virtio-iommu does not support aliased BDF", + __func__); + return ret; + } + + range_inverse_array(iova_ranges, + &sdev->host_resv_ranges, + 0, UINT64_MAX); + rebuild_resv_regions(sdev); + + return 0; +} + +static void virtio_iommu_unset_host_iova_ranges(VirtIOIOMMU *s, PCIBus *bus, + int devfn) +{ + IOMMUPciBus *sbus = g_hash_table_lookup(s->as_by_busptr, bus); + IOMMUDevice *sdev; + + if (!sbus) { + return; + } + + sdev = sbus->pbdev[devfn]; + if (!sdev) { + return; + } + + g_list_free_full(g_steal_pointer(&sdev->host_resv_ranges), g_free); + g_list_free_full(sdev->resv_regions, g_free); + sdev->host_resv_ranges = NULL; + sdev->resv_regions = NULL; + add_prop_resv_regions(sdev); +} + + +static bool check_page_size_mask(VirtIOIOMMU *viommu, uint64_t new_mask, + Error **errp) +{ + uint64_t cur_mask = viommu->config.page_size_mask; + + if ((cur_mask & new_mask) == 0) { + error_setg(errp, "virtio-iommu reports a page size mask 0x%"PRIx64 + " incompatible with currently supported mask 0x%"PRIx64, + new_mask, cur_mask); + return false; + } + /* + * Once the granule is frozen we can't change the mask anymore. If by + * chance the hotplugged device supports the same granule, we can still + * accept it. + */ + if (viommu->granule_frozen) { + int cur_granule = ctz64(cur_mask); + + if (!(BIT_ULL(cur_granule) & new_mask)) { + error_setg(errp, + "virtio-iommu does not support frozen granule 0x%llx", + BIT_ULL(cur_granule)); + return false; + } + } + return true; +} + +static bool virtio_iommu_set_iommu_device(PCIBus *bus, void *opaque, int devfn, + HostIOMMUDevice *hiod, Error **errp) +{ + ERRP_GUARD(); + VirtIOIOMMU *viommu = opaque; + HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_GET_CLASS(hiod); + struct hiod_key *new_key; + GList *host_iova_ranges = NULL; + + assert(hiod); + + if (get_host_iommu_device(viommu, bus, devfn)) { + error_setg(errp, "Host IOMMU device already exists"); + return false; + } + + if (hiodc->get_iova_ranges) { + int ret; + host_iova_ranges = hiodc->get_iova_ranges(hiod); + if (!host_iova_ranges) { + return true; /* some old kernels may not support that capability */ + } + ret = virtio_iommu_set_host_iova_ranges(viommu, hiod->aliased_bus, + hiod->aliased_devfn, + host_iova_ranges, errp); + if (ret) { + goto error; + } + } + if (hiodc->get_page_size_mask) { + uint64_t new_mask = hiodc->get_page_size_mask(hiod); + + if (check_page_size_mask(viommu, new_mask, errp)) { + /* + * The default mask depends on the "granule" property. For example, + * with 4k granule, it is -(4 * KiB). When an assigned device has + * page size restrictions due to the hardware IOMMU configuration, + * apply this restriction to the mask. + */ + trace_virtio_iommu_update_page_size_mask(hiod->name, + viommu->config.page_size_mask, + new_mask); + if (!viommu->granule_frozen) { + viommu->config.page_size_mask &= new_mask; + } + } else { + error_prepend(errp, "%s: ", hiod->name); + goto error; + } + } + + new_key = g_malloc(sizeof(*new_key)); + new_key->bus = bus; + new_key->devfn = devfn; + + object_ref(hiod); + g_hash_table_insert(viommu->host_iommu_devices, new_key, hiod); + g_list_free_full(host_iova_ranges, g_free); + + return true; +error: + g_list_free_full(host_iova_ranges, g_free); + return false; +} + +static void +virtio_iommu_unset_iommu_device(PCIBus *bus, void *opaque, int devfn) +{ + VirtIOIOMMU *viommu = opaque; + HostIOMMUDevice *hiod; + struct hiod_key key = { + .bus = bus, + .devfn = devfn, + }; + + hiod = g_hash_table_lookup(viommu->host_iommu_devices, &key); + if (!hiod) { + return; + } + virtio_iommu_unset_host_iova_ranges(viommu, hiod->aliased_bus, + hiod->aliased_devfn); + + g_hash_table_remove(viommu->host_iommu_devices, &key); +} + static const PCIIOMMUOps virtio_iommu_ops = { .get_address_space = virtio_iommu_find_add_as, + .set_iommu_device = virtio_iommu_set_iommu_device, + .unset_iommu_device = virtio_iommu_unset_iommu_device, }; static int virtio_iommu_attach(VirtIOIOMMU *s, @@ -544,6 +787,7 @@ static int virtio_iommu_detach(VirtIOIOMMU *s, if (QLIST_EMPTY(&domain->endpoint_list)) { g_tree_remove(s->domains, GUINT_TO_POINTER(domain->id)); } + g_tree_remove(s->endpoints, GUINT_TO_POINTER(ep_id)); return VIRTIO_IOMMU_S_OK; } @@ -706,7 +950,6 @@ static int virtio_iommu_probe(VirtIOIOMMU *s, } buf += count; free -= count; - sdev->probe_done = true; return VIRTIO_IOMMU_S_OK; } @@ -782,6 +1025,9 @@ static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq) iov = elem->out_sg; sz = iov_to_buf(iov, iov_cnt, 0, &head, sizeof(head)); if (unlikely(sz != sizeof(head))) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: read %zu bytes from command head" + "but expected %zu\n", __func__, sz, sizeof(head)); tail.status = VIRTIO_IOMMU_S_DEVERR; goto out; } @@ -818,6 +1064,25 @@ static void virtio_iommu_handle_command(VirtIODevice *vdev, VirtQueue *vq) out: sz = iov_from_buf(elem->in_sg, elem->in_num, 0, buf ? buf : &tail, output_size); + if (unlikely(sz != output_size)) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: wrote %zu bytes to command response" + "but response size is %zu\n", + __func__, sz, output_size); + tail.status = VIRTIO_IOMMU_S_DEVERR; + /* + * We checked that sizeof(tail) can fit to elem->in_sg at the + * beginning of the loop + */ + output_size = sizeof(tail); + g_free(buf); + buf = NULL; + sz = iov_from_buf(elem->in_sg, + elem->in_num, + 0, + &tail, + output_size); + } assert(sz == output_size); virtqueue_push(vq, elem, sz); @@ -1115,150 +1380,6 @@ static int virtio_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu_mr, return 0; } -/* - * The default mask depends on the "granule" property. For example, with - * 4k granule, it is -(4 * KiB). When an assigned device has page size - * restrictions due to the hardware IOMMU configuration, apply this restriction - * to the mask. - */ -static int virtio_iommu_set_page_size_mask(IOMMUMemoryRegion *mr, - uint64_t new_mask, - Error **errp) -{ - IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr); - VirtIOIOMMU *s = sdev->viommu; - uint64_t cur_mask = s->config.page_size_mask; - - trace_virtio_iommu_set_page_size_mask(mr->parent_obj.name, cur_mask, - new_mask); - - if ((cur_mask & new_mask) == 0) { - error_setg(errp, "virtio-iommu %s reports a page size mask 0x%"PRIx64 - " incompatible with currently supported mask 0x%"PRIx64, - mr->parent_obj.name, new_mask, cur_mask); - return -1; - } - - /* - * Once the granule is frozen we can't change the mask anymore. If by - * chance the hotplugged device supports the same granule, we can still - * accept it. - */ - if (s->granule_frozen) { - int cur_granule = ctz64(cur_mask); - - if (!(BIT_ULL(cur_granule) & new_mask)) { - error_setg(errp, "virtio-iommu %s does not support frozen granule 0x%llx", - mr->parent_obj.name, BIT_ULL(cur_granule)); - return -1; - } - return 0; - } - - s->config.page_size_mask &= new_mask; - return 0; -} - -/** - * rebuild_resv_regions: rebuild resv regions with both the - * info of host resv ranges and property set resv ranges - */ -static int rebuild_resv_regions(IOMMUDevice *sdev) -{ - GList *l; - int i = 0; - - /* free the existing list and rebuild it from scratch */ - g_list_free_full(sdev->resv_regions, g_free); - sdev->resv_regions = NULL; - - /* First add host reserved regions if any, all tagged as RESERVED */ - for (l = sdev->host_resv_ranges; l; l = l->next) { - ReservedRegion *reg = g_new0(ReservedRegion, 1); - Range *r = (Range *)l->data; - - reg->type = VIRTIO_IOMMU_RESV_MEM_T_RESERVED; - range_set_bounds(®->range, range_lob(r), range_upb(r)); - sdev->resv_regions = resv_region_list_insert(sdev->resv_regions, reg); - trace_virtio_iommu_host_resv_regions(sdev->iommu_mr.parent_obj.name, i, - range_lob(®->range), - range_upb(®->range)); - i++; - } - /* - * then add higher priority reserved regions set by the machine - * through properties - */ - add_prop_resv_regions(sdev); - return 0; -} - -/** - * virtio_iommu_set_iova_ranges: Conveys the usable IOVA ranges - * - * The function turns those into reserved ranges. Once some - * reserved ranges have been set, new reserved regions cannot be - * added outside of the original ones. - * - * @mr: IOMMU MR - * @iova_ranges: list of usable IOVA ranges - * @errp: error handle - */ -static int virtio_iommu_set_iova_ranges(IOMMUMemoryRegion *mr, - GList *iova_ranges, - Error **errp) -{ - IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr); - GList *current_ranges = sdev->host_resv_ranges; - GList *l, *tmp, *new_ranges = NULL; - int ret = -EINVAL; - - /* check that each new resv region is included in an existing one */ - if (sdev->host_resv_ranges) { - range_inverse_array(iova_ranges, - &new_ranges, - 0, UINT64_MAX); - - for (tmp = new_ranges; tmp; tmp = tmp->next) { - Range *newr = (Range *)tmp->data; - bool included = false; - - for (l = current_ranges; l; l = l->next) { - Range * r = (Range *)l->data; - - if (range_contains_range(r, newr)) { - included = true; - break; - } - } - if (!included) { - goto error; - } - } - /* all new reserved ranges are included in existing ones */ - ret = 0; - goto out; - } - - if (sdev->probe_done) { - warn_report("%s: Notified about new host reserved regions after probe", - mr->parent_obj.name); - } - - range_inverse_array(iova_ranges, - &sdev->host_resv_ranges, - 0, UINT64_MAX); - rebuild_resv_regions(sdev); - - return 0; -error: - error_setg(errp, "IOMMU mr=%s Conflicting host reserved ranges set!", - mr->parent_obj.name); -out: - g_list_free_full(new_ranges, g_free); - return ret; -} - static void virtio_iommu_system_reset(void *opaque) { VirtIOIOMMU *s = opaque; @@ -1281,18 +1402,6 @@ static void virtio_iommu_freeze_granule(Notifier *notifier, void *data) VirtIOIOMMU *s = container_of(notifier, VirtIOIOMMU, machine_done); int granule; - if (likely(s->config.bypass)) { - /* - * Transient IOMMU MR enable to collect page_size_mask requirements - * through memory_region_iommu_set_page_size_mask() called by - * VFIO region_add() callback - */ - s->config.bypass = false; - virtio_iommu_switch_address_space_all(s); - /* restore default */ - s->config.bypass = true; - virtio_iommu_switch_address_space_all(s); - } s->granule_frozen = true; granule = ctz64(s->config.page_size_mask); trace_virtio_iommu_freeze_granule(BIT_ULL(granule)); @@ -1357,6 +1466,9 @@ static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) s->as_by_busptr = g_hash_table_new_full(NULL, NULL, NULL, g_free); + s->host_iommu_devices = g_hash_table_new_full(hiod_hash, hiod_equal, + g_free, hiod_destroy); + if (s->primary_bus) { pci_setup_iommu(s->primary_bus, &virtio_iommu_ops, s); } else { @@ -1392,11 +1504,11 @@ static void virtio_iommu_device_unrealize(DeviceState *dev) virtio_cleanup(vdev); } -static void virtio_iommu_device_reset(VirtIODevice *vdev) +static void virtio_iommu_device_reset_exit(Object *obj, ResetType type) { - VirtIOIOMMU *s = VIRTIO_IOMMU(vdev); + VirtIOIOMMU *s = VIRTIO_IOMMU(obj); - trace_virtio_iommu_device_reset(); + trace_virtio_iommu_device_reset_exit(); if (s->domains) { g_tree_destroy(s->domains); @@ -1410,9 +1522,10 @@ static void virtio_iommu_device_reset(VirtIODevice *vdev) NULL, NULL, virtio_iommu_put_endpoint); } -static void virtio_iommu_set_status(VirtIODevice *vdev, uint8_t status) +static int virtio_iommu_set_status(VirtIODevice *vdev, uint8_t status) { trace_virtio_iommu_device_status(status); + return 0; } static void virtio_iommu_instance_init(Object *obj) @@ -1543,20 +1656,20 @@ static const VMStateDescription vmstate_virtio_iommu = { }, }; -static Property virtio_iommu_properties[] = { +static const Property virtio_iommu_properties[] = { DEFINE_PROP_LINK("primary-bus", VirtIOIOMMU, primary_bus, TYPE_PCI_BUS, PCIBus *), DEFINE_PROP_BOOL("boot-bypass", VirtIOIOMMU, boot_bypass, true), DEFINE_PROP_GRANULE_MODE("granule", VirtIOIOMMU, granule_mode, GRANULE_MODE_HOST), DEFINE_PROP_UINT8("aw-bits", VirtIOIOMMU, aw_bits, 64), - DEFINE_PROP_END_OF_LIST(), }; -static void virtio_iommu_class_init(ObjectClass *klass, void *data) +static void virtio_iommu_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); + ResettableClass *rc = RESETTABLE_CLASS(klass); device_class_set_props(dc, virtio_iommu_properties); dc->vmsd = &vmstate_virtio_iommu; @@ -1564,7 +1677,12 @@ static void virtio_iommu_class_init(ObjectClass *klass, void *data) set_bit(DEVICE_CATEGORY_MISC, dc->categories); vdc->realize = virtio_iommu_device_realize; vdc->unrealize = virtio_iommu_device_unrealize; - vdc->reset = virtio_iommu_device_reset; + + /* + * Use 'exit' reset phase to make sure all DMA requests + * have been quiesced during 'enter' or 'hold' phase + */ + rc->phases.exit = virtio_iommu_device_reset_exit; vdc->get_config = virtio_iommu_get_config; vdc->set_config = virtio_iommu_set_config; vdc->get_features = virtio_iommu_get_features; @@ -1573,15 +1691,13 @@ static void virtio_iommu_class_init(ObjectClass *klass, void *data) } static void virtio_iommu_memory_region_class_init(ObjectClass *klass, - void *data) + const void *data) { IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass); imrc->translate = virtio_iommu_translate; imrc->replay = virtio_iommu_replay; imrc->notify_flag_changed = virtio_iommu_notify_flag_changed; - imrc->iommu_set_page_size_mask = virtio_iommu_set_page_size_mask; - imrc->iommu_set_iova_ranges = virtio_iommu_set_iova_ranges; } static const TypeInfo virtio_iommu_info = { diff --git a/hw/virtio/virtio-md-pci.c b/hw/virtio/virtio-md-pci.c index 9ec5067..9278b32 100644 --- a/hw/virtio/virtio-md-pci.c +++ b/hw/virtio/virtio-md-pci.c @@ -138,7 +138,7 @@ static const TypeInfo virtio_md_pci_info = { .instance_size = sizeof(VirtIOMDPCI), .class_size = sizeof(VirtIOMDPCIClass), .abstract = true, - .interfaces = (InterfaceInfo[]) { + .interfaces = (const InterfaceInfo[]) { { TYPE_MEMORY_DEVICE }, { } }, diff --git a/hw/virtio/virtio-mem-pci.c b/hw/virtio/virtio-mem-pci.c index 1b4e9a3..f592eb1 100644 --- a/hw/virtio/virtio-mem-pci.c +++ b/hw/virtio/virtio-mem-pci.c @@ -22,6 +22,10 @@ static void virtio_mem_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) VirtIOMEMPCI *mem_pci = VIRTIO_MEM_PCI(vpci_dev); DeviceState *vdev = DEVICE(&mem_pci->vdev); + if (vpci_dev->nvectors == DEV_NVECTORS_UNSPECIFIED) { + vpci_dev->nvectors = 2; + } + virtio_pci_force_virtio_1(vpci_dev); qdev_realize(vdev, BUS(&vpci_dev->bus), errp); } @@ -152,7 +156,14 @@ static void virtio_mem_pci_set_requested_size(Object *obj, Visitor *v, object_property_set(OBJECT(&pci_mem->vdev), name, v, errp); } -static void virtio_mem_pci_class_init(ObjectClass *klass, void *data) +static const Property virtio_mem_pci_class_properties[] = { + DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags, + VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, true), + DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, + DEV_NVECTORS_UNSPECIFIED), +}; + +static void virtio_mem_pci_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); @@ -164,6 +175,7 @@ static void virtio_mem_pci_class_init(ObjectClass *klass, void *data) set_bit(DEVICE_CATEGORY_MISC, dc->categories); pcidev_k->revision = VIRTIO_PCI_ABI_VERSION; pcidev_k->class_id = PCI_CLASS_OTHERS; + device_class_set_props(dc, virtio_mem_pci_class_properties); mdc->get_addr = virtio_mem_pci_get_addr; mdc->set_addr = virtio_mem_pci_set_addr; diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c index ffd119e..c46f6f9 100644 --- a/hw/virtio/virtio-mem.c +++ b/hw/virtio/virtio-mem.c @@ -15,20 +15,20 @@ #include "qemu/cutils.h" #include "qemu/error-report.h" #include "qemu/units.h" -#include "sysemu/numa.h" -#include "sysemu/sysemu.h" -#include "sysemu/reset.h" -#include "sysemu/runstate.h" +#include "system/numa.h" +#include "system/system.h" +#include "system/reset.h" +#include "system/runstate.h" #include "hw/virtio/virtio.h" #include "hw/virtio/virtio-bus.h" #include "hw/virtio/virtio-mem.h" #include "qapi/error.h" #include "qapi/visitor.h" -#include "exec/ram_addr.h" +#include "system/ram_addr.h" #include "migration/misc.h" #include "hw/boards.h" #include "hw/qdev-properties.h" -#include CONFIG_DEVICES +#include "hw/acpi/acpi.h" #include "trace.h" static const VMStateDescription vmstate_virtio_mem_device_early; @@ -61,6 +61,8 @@ static uint32_t virtio_mem_default_thp_size(void) } else if (qemu_real_host_page_size() == 64 * KiB) { default_thp_size = 512 * MiB; } +#elif defined(__s390x__) + default_thp_size = 1 * MiB; #endif return default_thp_size; @@ -88,6 +90,7 @@ static uint32_t virtio_mem_default_thp_size(void) static uint32_t thp_size; #define HPAGE_PMD_SIZE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size" +#define HPAGE_PATH "/sys/kernel/mm/transparent_hugepage/" static uint32_t virtio_mem_thp_size(void) { gchar *content = NULL; @@ -98,6 +101,12 @@ static uint32_t virtio_mem_thp_size(void) return thp_size; } + /* No THP -> no restrictions. */ + if (!g_file_test(HPAGE_PATH, G_FILE_TEST_EXISTS)) { + thp_size = VIRTIO_MEM_MIN_BLOCK_SIZE; + return thp_size; + } + /* * Try to probe the actual THP size, fallback to (sane but eventually * incorrect) default sizes. @@ -161,7 +170,7 @@ static bool virtio_mem_has_shared_zeropage(RAMBlock *rb) * necessary (as the section size can change). But it's more likely that the * section size will rather get smaller and not bigger over time. */ -#if defined(TARGET_X86_64) || defined(TARGET_I386) +#if defined(TARGET_X86_64) || defined(TARGET_I386) || defined(TARGET_S390X) #define VIRTIO_MEM_USABLE_EXTENT (2 * (128 * MiB)) #elif defined(TARGET_ARM) #define VIRTIO_MEM_USABLE_EXTENT (2 * (512 * MiB)) @@ -181,7 +190,7 @@ static bool virtio_mem_is_busy(void) * after plugging them) until we're running on the destination (as we didn't * migrate these blocks when they were unplugged). */ - return migration_in_incoming_postcopy() || !migration_is_idle(); + return migration_in_incoming_postcopy() || migration_is_running(); } typedef int (*virtio_mem_range_cb)(VirtIOMEM *vmem, void *arg, @@ -235,28 +244,6 @@ static int virtio_mem_for_each_plugged_range(VirtIOMEM *vmem, void *arg, return ret; } -/* - * Adjust the memory section to cover the intersection with the given range. - * - * Returns false if the intersection is empty, otherwise returns true. - */ -static bool virtio_mem_intersect_memory_section(MemoryRegionSection *s, - uint64_t offset, uint64_t size) -{ - uint64_t start = MAX(s->offset_within_region, offset); - uint64_t end = MIN(s->offset_within_region + int128_get64(s->size), - offset + size); - - if (end <= start) { - return false; - } - - s->offset_within_address_space += start - s->offset_within_region; - s->offset_within_region = start; - s->size = int128_make64(end - start); - return true; -} - typedef int (*virtio_mem_section_cb)(MemoryRegionSection *s, void *arg); static int virtio_mem_for_each_plugged_section(const VirtIOMEM *vmem, @@ -278,7 +265,7 @@ static int virtio_mem_for_each_plugged_section(const VirtIOMEM *vmem, first_bit + 1) - 1; size = (last_bit - first_bit + 1) * vmem->block_size; - if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) { + if (!memory_region_section_intersect_range(&tmp, offset, size)) { break; } ret = cb(&tmp, arg); @@ -310,7 +297,7 @@ static int virtio_mem_for_each_unplugged_section(const VirtIOMEM *vmem, first_bit + 1) - 1; size = (last_bit - first_bit + 1) * vmem->block_size; - if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) { + if (!memory_region_section_intersect_range(&tmp, offset, size)) { break; } ret = cb(&tmp, arg); @@ -346,7 +333,7 @@ static void virtio_mem_notify_unplug(VirtIOMEM *vmem, uint64_t offset, QLIST_FOREACH(rdl, &vmem->rdl_list, next) { MemoryRegionSection tmp = *rdl->section; - if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) { + if (!memory_region_section_intersect_range(&tmp, offset, size)) { continue; } rdl->notify_discard(rdl, &tmp); @@ -362,7 +349,7 @@ static int virtio_mem_notify_plug(VirtIOMEM *vmem, uint64_t offset, QLIST_FOREACH(rdl, &vmem->rdl_list, next) { MemoryRegionSection tmp = *rdl->section; - if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) { + if (!memory_region_section_intersect_range(&tmp, offset, size)) { continue; } ret = rdl->notify_populate(rdl, &tmp); @@ -379,7 +366,7 @@ static int virtio_mem_notify_plug(VirtIOMEM *vmem, uint64_t offset, if (rdl2 == rdl) { break; } - if (!virtio_mem_intersect_memory_section(&tmp, offset, size)) { + if (!memory_region_section_intersect_range(&tmp, offset, size)) { continue; } rdl2->notify_discard(rdl2, &tmp); @@ -874,15 +861,16 @@ static uint64_t virtio_mem_get_features(VirtIODevice *vdev, uint64_t features, MachineState *ms = MACHINE(qdev_get_machine()); VirtIOMEM *vmem = VIRTIO_MEM(vdev); - if (ms->numa_state) { -#if defined(CONFIG_ACPI) + if (ms->numa_state && acpi_builtin()) { virtio_add_feature(&features, VIRTIO_MEM_F_ACPI_PXM); -#endif } assert(vmem->unplugged_inaccessible != ON_OFF_AUTO_AUTO); if (vmem->unplugged_inaccessible == ON_OFF_AUTO_ON) { virtio_add_feature(&features, VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE); } + if (qemu_wakeup_suspend_enabled()) { + virtio_add_feature(&features, VIRTIO_MEM_F_PERSISTENT_SUSPEND); + } return features; } @@ -895,18 +883,6 @@ static int virtio_mem_validate_features(VirtIODevice *vdev) return 0; } -static void virtio_mem_system_reset(void *opaque) -{ - VirtIOMEM *vmem = VIRTIO_MEM(opaque); - - /* - * During usual resets, we will unplug all memory and shrink the usable - * region size. This is, however, not possible in all scenarios. Then, - * the guest has to deal with this manually (VIRTIO_MEM_REQ_UNPLUG_ALL). - */ - virtio_mem_unplug_all(vmem); -} - static void virtio_mem_prepare_mr(VirtIOMEM *vmem) { const uint64_t region_size = memory_region_size(&vmem->memdev->mr); @@ -958,6 +934,7 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp) VirtIOMEM *vmem = VIRTIO_MEM(dev); uint64_t page_size; RAMBlock *rb; + Object *obj; int ret; if (!vmem->memdev) { @@ -990,7 +967,7 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp) return; } - if (enable_mlock) { + if (should_mlock(mlock_state)) { error_setg(errp, "Incompatible with mlock"); return; } @@ -1071,6 +1048,17 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp) } /* + * Set ourselves as RamDiscardManager before the plug handler maps the + * memory region and exposes it via an address space. + */ + if (memory_region_set_ram_discard_manager(&vmem->memdev->mr, + RAM_DISCARD_MANAGER(vmem))) { + error_setg(errp, "Failed to set RamDiscardManager"); + ram_block_coordinated_discard_require(false); + return; + } + + /* * We don't know at this point whether shared RAM is migrated using * QEMU or migrated using the file content. "x-ignore-shared" will be * configured after realizing the device. So in case we have an @@ -1084,6 +1072,7 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp) ret = ram_block_discard_range(rb, 0, qemu_ram_get_used_length(rb)); if (ret) { error_setg_errno(errp, -ret, "Unexpected error discarding RAM"); + memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL); ram_block_coordinated_discard_require(false); return; } @@ -1123,14 +1112,28 @@ static void virtio_mem_device_realize(DeviceState *dev, Error **errp) vmstate_register_any(VMSTATE_IF(vmem), &vmstate_virtio_mem_device_early, vmem); } - qemu_register_reset(virtio_mem_system_reset, vmem); /* - * Set ourselves as RamDiscardManager before the plug handler maps the - * memory region and exposes it via an address space. + * We only want to unplug all memory to start with a clean slate when + * it is safe for the guest -- during system resets that call + * qemu_devices_reset(). + * + * We'll filter out selected qemu_devices_reset() calls used for other + * purposes, like resetting all devices during wakeup from suspend on + * x86 based on the reset type passed to qemu_devices_reset(). + * + * Unplugging all memory during simple device resets can result in the VM + * unexpectedly losing RAM, corrupting VM state. + * + * Simple device resets (or resets triggered by getting a parent device + * reset) must not change the state of plugged memory blocks. Therefore, + * we need a dedicated reset object that only gets called during + * qemu_devices_reset(). */ - memory_region_set_ram_discard_manager(&vmem->memdev->mr, - RAM_DISCARD_MANAGER(vmem)); + obj = object_new(TYPE_VIRTIO_MEM_SYSTEM_RESET); + vmem->system_reset = VIRTIO_MEM_SYSTEM_RESET(obj); + vmem->system_reset->vmem = vmem; + qemu_register_resettable(obj); } static void virtio_mem_device_unrealize(DeviceState *dev) @@ -1138,12 +1141,9 @@ static void virtio_mem_device_unrealize(DeviceState *dev) VirtIODevice *vdev = VIRTIO_DEVICE(dev); VirtIOMEM *vmem = VIRTIO_MEM(dev); - /* - * The unplug handler unmapped the memory region, it cannot be - * found via an address space anymore. Unset ourselves. - */ - memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL); - qemu_unregister_reset(virtio_mem_system_reset, vmem); + qemu_unregister_resettable(OBJECT(vmem->system_reset)); + object_unref(OBJECT(vmem->system_reset)); + if (vmem->early_migration) { vmstate_unregister(VMSTATE_IF(vmem), &vmstate_virtio_mem_device_early, vmem); @@ -1153,6 +1153,11 @@ static void virtio_mem_device_unrealize(DeviceState *dev) virtio_del_queue(vdev, 0); virtio_cleanup(vdev); g_free(vmem->bitmap); + /* + * The unplug handler unmapped the memory region, it cannot be + * found via an address space anymore. Unset ourselves. + */ + memory_region_set_ram_discard_manager(&vmem->memdev->mr, NULL); ram_block_coordinated_discard_require(false); } @@ -1682,7 +1687,7 @@ static void virtio_mem_instance_finalize(Object *obj) vmem->mr = NULL; } -static Property virtio_mem_properties[] = { +static const Property virtio_mem_properties[] = { DEFINE_PROP_UINT64(VIRTIO_MEM_ADDR_PROP, VirtIOMEM, addr, 0), DEFINE_PROP_UINT32(VIRTIO_MEM_NODE_PROP, VirtIOMEM, node, 0), DEFINE_PROP_BOOL(VIRTIO_MEM_PREALLOC_PROP, VirtIOMEM, prealloc, false), @@ -1696,7 +1701,6 @@ static Property virtio_mem_properties[] = { early_migration, true), DEFINE_PROP_BOOL(VIRTIO_MEM_DYNAMIC_MEMSLOTS_PROP, VirtIOMEM, dynamic_memslots, false), - DEFINE_PROP_END_OF_LIST(), }; static uint64_t virtio_mem_rdm_get_min_granularity(const RamDiscardManager *rdm, @@ -1728,7 +1732,7 @@ static bool virtio_mem_rdm_is_populated(const RamDiscardManager *rdm, } struct VirtIOMEMReplayData { - void *fn; + ReplayRamDiscardState fn; void *opaque; }; @@ -1736,12 +1740,12 @@ static int virtio_mem_rdm_replay_populated_cb(MemoryRegionSection *s, void *arg) { struct VirtIOMEMReplayData *data = arg; - return ((ReplayRamPopulate)data->fn)(s, data->opaque); + return data->fn(s, data->opaque); } static int virtio_mem_rdm_replay_populated(const RamDiscardManager *rdm, MemoryRegionSection *s, - ReplayRamPopulate replay_fn, + ReplayRamDiscardState replay_fn, void *opaque) { const VirtIOMEM *vmem = VIRTIO_MEM(rdm); @@ -1760,14 +1764,13 @@ static int virtio_mem_rdm_replay_discarded_cb(MemoryRegionSection *s, { struct VirtIOMEMReplayData *data = arg; - ((ReplayRamDiscard)data->fn)(s, data->opaque); - return 0; + return data->fn(s, data->opaque); } -static void virtio_mem_rdm_replay_discarded(const RamDiscardManager *rdm, - MemoryRegionSection *s, - ReplayRamDiscard replay_fn, - void *opaque) +static int virtio_mem_rdm_replay_discarded(const RamDiscardManager *rdm, + MemoryRegionSection *s, + ReplayRamDiscardState replay_fn, + void *opaque) { const VirtIOMEM *vmem = VIRTIO_MEM(rdm); struct VirtIOMEMReplayData data = { @@ -1776,8 +1779,8 @@ static void virtio_mem_rdm_replay_discarded(const RamDiscardManager *rdm, }; g_assert(s->mr == &vmem->memdev->mr); - virtio_mem_for_each_unplugged_section(vmem, s, &data, - virtio_mem_rdm_replay_discarded_cb); + return virtio_mem_for_each_unplugged_section(vmem, s, &data, + virtio_mem_rdm_replay_discarded_cb); } static void virtio_mem_rdm_register_listener(RamDiscardManager *rdm, @@ -1832,8 +1835,8 @@ static void virtio_mem_unplug_request_check(VirtIOMEM *vmem, Error **errp) } if (vmem->size) { - error_setg(errp, "virtio-mem device cannot get unplugged while" - " '" VIRTIO_MEM_SIZE_PROP "' != '0'"); + error_setg(errp, "virtio-mem device cannot get unplugged while some" + " of its memory is still plugged"); return; } if (vmem->requested_size) { @@ -1843,7 +1846,7 @@ static void virtio_mem_unplug_request_check(VirtIOMEM *vmem, Error **errp) } } -static void virtio_mem_class_init(ObjectClass *klass, void *data) +static void virtio_mem_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); @@ -1885,7 +1888,7 @@ static const TypeInfo virtio_mem_info = { .instance_finalize = virtio_mem_instance_finalize, .class_init = virtio_mem_class_init, .class_size = sizeof(VirtIOMEMClass), - .interfaces = (InterfaceInfo[]) { + .interfaces = (const InterfaceInfo[]) { { TYPE_RAM_DISCARD_MANAGER }, { } }, @@ -1897,3 +1900,49 @@ static void virtio_register_types(void) } type_init(virtio_register_types) + +OBJECT_DEFINE_SIMPLE_TYPE_WITH_INTERFACES(VirtioMemSystemReset, virtio_mem_system_reset, VIRTIO_MEM_SYSTEM_RESET, OBJECT, { TYPE_RESETTABLE_INTERFACE }, { }) + +static void virtio_mem_system_reset_init(Object *obj) +{ +} + +static void virtio_mem_system_reset_finalize(Object *obj) +{ +} + +static ResettableState *virtio_mem_system_reset_get_state(Object *obj) +{ + VirtioMemSystemReset *vmem_reset = VIRTIO_MEM_SYSTEM_RESET(obj); + + return &vmem_reset->reset_state; +} + +static void virtio_mem_system_reset_hold(Object *obj, ResetType type) +{ + VirtioMemSystemReset *vmem_reset = VIRTIO_MEM_SYSTEM_RESET(obj); + VirtIOMEM *vmem = vmem_reset->vmem; + + /* + * When waking up from standby/suspend-to-ram, do not unplug any memory. + */ + if (type == RESET_TYPE_WAKEUP) { + return; + } + + /* + * During usual resets, we will unplug all memory and shrink the usable + * region size. This is, however, not possible in all scenarios. Then, + * the guest has to deal with this manually (VIRTIO_MEM_REQ_UNPLUG_ALL). + */ + virtio_mem_unplug_all(vmem); +} + +static void virtio_mem_system_reset_class_init(ObjectClass *klass, + const void *data) +{ + ResettableClass *rc = RESETTABLE_CLASS(klass); + + rc->get_state = virtio_mem_system_reset_get_state; + rc->phases.hold = virtio_mem_system_reset_hold; +} diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c index 22f9fbc..532c671 100644 --- a/hw/virtio/virtio-mmio.c +++ b/hw/virtio/virtio-mmio.c @@ -28,8 +28,8 @@ #include "migration/qemu-file-types.h" #include "qemu/host-utils.h" #include "qemu/module.h" -#include "sysemu/kvm.h" -#include "sysemu/replay.h" +#include "system/kvm.h" +#include "system/replay.h" #include "hw/virtio/virtio-mmio.h" #include "qemu/error-report.h" #include "qemu/log.h" @@ -248,6 +248,7 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value, { VirtIOMMIOProxy *proxy = (VirtIOMMIOProxy *)opaque; VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); + uint16_t vq_idx; trace_virtio_mmio_write_offset(offset, value); @@ -407,8 +408,14 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value, } break; case VIRTIO_MMIO_QUEUE_NOTIFY: - if (value < VIRTIO_QUEUE_MAX) { - virtio_queue_notify(vdev, value); + vq_idx = value; + if (vq_idx < VIRTIO_QUEUE_MAX && virtio_queue_get_num(vdev, vq_idx)) { + if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFICATION_DATA)) { + VirtQueue *vq = virtio_get_queue(vdev, vq_idx); + + virtio_queue_set_shadow_avail_idx(vq, (value >> 16) & 0xFFFF); + } + virtio_queue_notify(vdev, vq_idx); } break; case VIRTIO_MMIO_INTERRUPT_ACK: @@ -744,13 +751,12 @@ static void virtio_mmio_pre_plugged(DeviceState *d, Error **errp) /* virtio-mmio device */ -static Property virtio_mmio_properties[] = { +static const Property virtio_mmio_properties[] = { DEFINE_PROP_BOOL("format_transport_address", VirtIOMMIOProxy, format_transport_address, true), DEFINE_PROP_BOOL("force-legacy", VirtIOMMIOProxy, legacy, true), DEFINE_PROP_BIT("ioeventfd", VirtIOMMIOProxy, flags, VIRTIO_IOMMIO_FLAG_USE_IOEVENTFD_BIT, true), - DEFINE_PROP_END_OF_LIST(), }; static void virtio_mmio_realizefn(DeviceState *d, Error **errp) @@ -778,12 +784,12 @@ static void virtio_mmio_realizefn(DeviceState *d, Error **errp) sysbus_init_mmio(sbd, &proxy->iomem); } -static void virtio_mmio_class_init(ObjectClass *klass, void *data) +static void virtio_mmio_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); dc->realize = virtio_mmio_realizefn; - dc->reset = virtio_mmio_reset; + device_class_set_legacy_reset(dc, virtio_mmio_reset); set_bit(DEVICE_CATEGORY_MISC, dc->categories); device_class_set_props(dc, virtio_mmio_properties); } @@ -849,7 +855,7 @@ static void virtio_mmio_vmstate_change(DeviceState *d, bool running) } } -static void virtio_mmio_bus_class_init(ObjectClass *klass, void *data) +static void virtio_mmio_bus_class_init(ObjectClass *klass, const void *data) { BusClass *bus_class = BUS_CLASS(klass); VirtioBusClass *k = VIRTIO_BUS_CLASS(klass); diff --git a/hw/virtio/virtio-net-pci.c b/hw/virtio/virtio-net-pci.c index e03543a..f857a84 100644 --- a/hw/virtio/virtio-net-pci.c +++ b/hw/virtio/virtio-net-pci.c @@ -38,12 +38,11 @@ struct VirtIONetPCI { VirtIONet vdev; }; -static Property virtio_net_properties[] = { +static const Property virtio_net_properties[] = { DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags, VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, true), DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, DEV_NVECTORS_UNSPECIFIED), - DEFINE_PROP_END_OF_LIST(), }; static void virtio_net_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) @@ -64,7 +63,7 @@ static void virtio_net_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) qdev_realize(vdev, BUS(&vpci_dev->bus), errp); } -static void virtio_net_pci_class_init(ObjectClass *klass, void *data) +static void virtio_net_pci_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); @@ -75,6 +74,7 @@ static void virtio_net_pci_class_init(ObjectClass *klass, void *data) k->device_id = PCI_DEVICE_ID_VIRTIO_NET; k->revision = VIRTIO_PCI_ABI_VERSION; k->class_id = PCI_CLASS_NETWORK_ETHERNET; + k->sriov_vf_user_creatable = true; set_bit(DEVICE_CATEGORY_NETWORK, dc->categories); device_class_set_props(dc, virtio_net_properties); vpciklass->realize = virtio_net_pci_realize; diff --git a/hw/virtio/virtio-nsm-pci.c b/hw/virtio/virtio-nsm-pci.c new file mode 100644 index 0000000..ec24396 --- /dev/null +++ b/hw/virtio/virtio-nsm-pci.c @@ -0,0 +1,73 @@ +/* + * AWS Nitro Secure Module (NSM) device + * + * Copyright (c) 2024 Dorjoy Chowdhury <dorjoychy111@gmail.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. See the COPYING file in the + * top-level directory. + */ + +#include "qemu/osdep.h" + +#include "hw/virtio/virtio-pci.h" +#include "hw/virtio/virtio-nsm.h" +#include "hw/qdev-properties.h" +#include "qapi/error.h" +#include "qemu/module.h" +#include "qom/object.h" + +typedef struct VirtIONsmPCI VirtIONsmPCI; + +#define TYPE_VIRTIO_NSM_PCI "virtio-nsm-pci-base" +DECLARE_INSTANCE_CHECKER(VirtIONsmPCI, VIRTIO_NSM_PCI, + TYPE_VIRTIO_NSM_PCI) + +struct VirtIONsmPCI { + VirtIOPCIProxy parent_obj; + VirtIONSM vdev; +}; + +static void virtio_nsm_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) +{ + VirtIONsmPCI *vnsm = VIRTIO_NSM_PCI(vpci_dev); + DeviceState *vdev = DEVICE(&vnsm->vdev); + + virtio_pci_force_virtio_1(vpci_dev); + + if (!qdev_realize(vdev, BUS(&vpci_dev->bus), errp)) { + return; + } +} + +static void virtio_nsm_pci_class_init(ObjectClass *klass, const void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); + + k->realize = virtio_nsm_pci_realize; + set_bit(DEVICE_CATEGORY_MISC, dc->categories); +} + +static void virtio_nsm_initfn(Object *obj) +{ + VirtIONsmPCI *dev = VIRTIO_NSM_PCI(obj); + + virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev), + TYPE_VIRTIO_NSM); +} + +static const VirtioPCIDeviceTypeInfo virtio_nsm_pci_info = { + .base_name = TYPE_VIRTIO_NSM_PCI, + .generic_name = "virtio-nsm-pci", + .instance_size = sizeof(VirtIONsmPCI), + .instance_init = virtio_nsm_initfn, + .class_init = virtio_nsm_pci_class_init, +}; + +static void virtio_nsm_pci_register(void) +{ + virtio_pci_types_register(&virtio_nsm_pci_info); +} + +type_init(virtio_nsm_pci_register) diff --git a/hw/virtio/virtio-nsm.c b/hw/virtio/virtio-nsm.c new file mode 100644 index 0000000..3bf5e70 --- /dev/null +++ b/hw/virtio/virtio-nsm.c @@ -0,0 +1,1737 @@ +/* + * AWS Nitro Secure Module (NSM) device + * + * Copyright (c) 2024 Dorjoy Chowdhury <dorjoychy111@gmail.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * (at your option) any later version. See the COPYING file in the + * top-level directory. + */ + +#include "qemu/osdep.h" +#include "qemu/iov.h" +#include "qemu/guest-random.h" +#include "qapi/error.h" + +#include "crypto/hash.h" +#include "hw/virtio/virtio.h" +#include "hw/virtio/virtio-nsm.h" +#include "hw/virtio/cbor-helpers.h" +#include "standard-headers/linux/virtio_ids.h" + +#define NSM_REQUEST_MAX_SIZE 0x1000 +#define NSM_RESPONSE_BUF_SIZE 0x3000 +#define NSM_RND_BUF_SIZE 256 + +enum NSMResponseTypes { + NSM_SUCCESS = 0, + NSM_INVALID_ARGUMENT = 1, + NSM_INVALID_INDEX = 2, + NSM_READONLY_INDEX = 3, + NSM_INVALID_OPERATION = 4, + NSM_BUFFER_TOO_SMALL = 5, + NSM_INPUT_TOO_LARGE = 6, + NSM_INTERNAL_ERROR = 7, +}; + +static const char *error_string(enum NSMResponseTypes type) +{ + const char *str; + switch (type) { + case NSM_INVALID_ARGUMENT: + str = "InvalidArgument"; + break; + case NSM_INVALID_INDEX: + str = "InvalidIndex"; + break; + case NSM_READONLY_INDEX: + str = "ReadOnlyIndex"; + break; + case NSM_INVALID_OPERATION: + str = "InvalidOperation"; + break; + case NSM_BUFFER_TOO_SMALL: + str = "BufferTooSmall"; + break; + case NSM_INPUT_TOO_LARGE: + str = "InputTooLarge"; + break; + default: + str = "InternalError"; + break; + } + + return str; +} + +/* + * Error response structure: + * + * { + * Map(1) { + * key = String("Error"), + * value = String(error_name) + * } + * } + * + * where error_name can be one of the following: + * InvalidArgument + * InvalidIndex + * InvalidResponse + * ReadOnlyIndex + * InvalidOperation + * BufferTooSmall + * InputTooLarge + * InternalError + */ + +static bool error_response(struct iovec *response, enum NSMResponseTypes error, + Error **errp) +{ + cbor_item_t *root; + size_t len; + bool r = false; + + root = cbor_new_definite_map(1); + if (!root) { + goto err; + } + + if (!qemu_cbor_add_string_to_map(root, "Error", error_string(error))) { + goto err; + } + + len = cbor_serialize(root, response->iov_base, response->iov_len); + if (len == 0) { + error_setg(errp, "Response buffer is small for %s response", + error_string(error)); + goto out; + } + response->iov_len = len; + r = true; + + out: + if (root) { + cbor_decref(&root); + } + return r; + + err: + error_setg(errp, "Failed to initialize %s response", error_string(error)); + goto out; +} + +/* + * GetRandom response structure: + * + * { + * Map(1) { + * key = String("GetRandom"), + * value = Map(1) { + * key = String("random"), + * value = Byte_String() + * } + * } + * } + */ +static bool handle_get_random(VirtIONSM *vnsm, struct iovec *request, + struct iovec *response, Error **errp) +{ + cbor_item_t *root, *nested_map; + size_t len; + uint8_t rnd[NSM_RND_BUF_SIZE]; + bool r = false; + + root = cbor_new_definite_map(1); + if (!root) { + goto err; + } + + if (!qemu_cbor_add_map_to_map(root, "GetRandom", 1, &nested_map)) { + goto err; + } + + qemu_guest_getrandom_nofail(rnd, NSM_RND_BUF_SIZE); + + if (!qemu_cbor_add_bytestring_to_map(nested_map, "random", rnd, + NSM_RND_BUF_SIZE)) { + goto err; + } + + len = cbor_serialize(root, response->iov_base, response->iov_len); + if (len == 0) { + if (error_response(response, NSM_INPUT_TOO_LARGE, errp)) { + r = true; + } + goto out; + } + + response->iov_len = len; + r = true; + + out: + if (root) { + cbor_decref(&root); + } + return r; + + err: + error_setg(errp, "Failed to initialize GetRandom response"); + goto out; +} + +/* + * DescribeNSM response structure: + * + * { + * Map(1) { + * key = String("DescribeNSM"), + * value = Map(7) { + * key = String("digest"), + * value = String("SHA384"), + * key = String("max_pcrs"), + * value = Uint8(32), + * key = String("module_id"), + * value = String("i-1234-enc5678"), + * key = String("locked_pcrs"), + * value = Array<Uint8>(), + * key = String("version_major"), + * value = Uint8(1), + * key = String("version_minor"), + * value = Uint8(0), + * key = String("version_patch"), + * value = Uint8(0) + * } + * } + * } + */ +static bool handle_describe_nsm(VirtIONSM *vnsm, struct iovec *request, + struct iovec *response, Error **errp) +{ + cbor_item_t *root, *nested_map; + uint16_t locked_pcrs_cnt = 0; + uint8_t locked_pcrs_ind[NSM_MAX_PCRS]; + size_t len; + bool r = false; + + root = cbor_new_definite_map(1); + if (!root) { + goto err; + } + + if (!qemu_cbor_add_map_to_map(root, "DescribeNSM", 7, &nested_map)) { + goto err; + } + + if (!qemu_cbor_add_string_to_map(nested_map, "digest", vnsm->digest)) { + goto err; + } + + if (!qemu_cbor_add_uint8_to_map(nested_map, "max_pcrs", vnsm->max_pcrs)) { + goto err; + } + + if (!qemu_cbor_add_string_to_map(nested_map, "module_id", + vnsm->module_id)) { + goto err; + } + + for (uint8_t i = 0; i < NSM_MAX_PCRS; ++i) { + if (vnsm->pcrs[i].locked) { + locked_pcrs_ind[locked_pcrs_cnt++] = i; + } + } + if (!qemu_cbor_add_uint8_array_to_map(nested_map, "locked_pcrs", + locked_pcrs_ind, locked_pcrs_cnt)) { + goto err; + } + + if (!qemu_cbor_add_uint8_to_map(nested_map, "version_major", + vnsm->version_major)) { + goto err; + } + + if (!qemu_cbor_add_uint8_to_map(nested_map, "version_minor", + vnsm->version_minor)) { + goto err; + } + + if (!qemu_cbor_add_uint8_to_map(nested_map, "version_patch", + vnsm->version_patch)) { + goto err; + } + + len = cbor_serialize(root, response->iov_base, response->iov_len); + if (len == 0) { + if (error_response(response, NSM_INPUT_TOO_LARGE, errp)) { + r = true; + } + goto out; + } + + response->iov_len = len; + r = true; + + out: + if (root) { + cbor_decref(&root); + } + return r; + + err: + error_setg(errp, "Failed to initialize DescribeNSM response"); + goto out; +} + +/* + * DescribePCR request structure: + * + * { + * Map(1) { + * key = String("DescribePCR"), + * value = Map(1) { + * key = String("index"), + * value = Uint8(pcr) + * } + * } + * } + */ +typedef struct NSMDescribePCRReq { + uint8_t index; +} NSMDescribePCRReq; + +static enum NSMResponseTypes get_nsm_describe_pcr_req( + uint8_t *req, size_t len, + NSMDescribePCRReq *nsm_req) +{ + size_t size; + uint8_t *str; + struct cbor_pair *pair; + cbor_item_t *item = NULL; + struct cbor_load_result result; + enum NSMResponseTypes r = NSM_INVALID_OPERATION; + + item = cbor_load(req, len, &result); + if (!item || result.error.code != CBOR_ERR_NONE) { + goto cleanup; + } + + pair = cbor_map_handle(item); + if (!cbor_isa_map(pair->value)) { + goto cleanup; + } + size = cbor_map_size(pair->value); + if (size < 1) { + goto cleanup; + } + + pair = cbor_map_handle(pair->value); + for (int i = 0; i < size; ++i) { + if (!cbor_isa_string(pair[i].key)) { + continue; + } + + str = cbor_string_handle(pair[i].key); + if (str && cbor_string_length(pair[i].key) == 5 && + memcmp(str, "index", 5) == 0) { + if (!cbor_isa_uint(pair[i].value) || + cbor_int_get_width(pair[i].value) != CBOR_INT_8) { + break; + } + + nsm_req->index = cbor_get_uint8(pair[i].value); + r = NSM_SUCCESS; + break; + } + } + + cleanup: + if (item) { + cbor_decref(&item); + } + return r; +} + +/* + * DescribePCR response structure: + * + * { + * Map(1) { + * key = String("DescribePCR"), + * value = Map(2) { + * key = String("data"), + * value = Byte_String(), + * key = String("lock"), + * value = Bool() + * } + * } + * } + */ +static bool handle_describe_pcr(VirtIONSM *vnsm, struct iovec *request, + struct iovec *response, Error **errp) +{ + cbor_item_t *root = NULL; + cbor_item_t *nested_map; + size_t len; + NSMDescribePCRReq nsm_req; + enum NSMResponseTypes type; + struct PCRInfo *pcr; + bool r = false; + + type = get_nsm_describe_pcr_req(request->iov_base, request->iov_len, + &nsm_req); + if (type != NSM_SUCCESS) { + if (error_response(response, type, errp)) { + r = true; + } + goto out; + } + if (nsm_req.index >= vnsm->max_pcrs) { + if (error_response(response, NSM_INVALID_INDEX, errp)) { + r = true; + } + goto out; + } + pcr = &(vnsm->pcrs[nsm_req.index]); + + root = cbor_new_definite_map(1); + if (!root) { + goto err; + } + + if (!qemu_cbor_add_map_to_map(root, "DescribePCR", 2, &nested_map)) { + goto err; + } + + if (!qemu_cbor_add_bytestring_to_map(nested_map, "data", pcr->data, + QCRYPTO_HASH_DIGEST_LEN_SHA384)) { + goto err; + } + + if (!qemu_cbor_add_bool_to_map(nested_map, "lock", pcr->locked)) { + goto err; + } + + len = cbor_serialize(root, response->iov_base, response->iov_len); + if (len == 0) { + if (error_response(response, NSM_INPUT_TOO_LARGE, errp)) { + r = true; + } + goto out; + } + + response->iov_len = len; + r = true; + + out: + if (root) { + cbor_decref(&root); + } + return r; + + err: + error_setg(errp, "Failed to initialize DescribePCR response"); + goto out; +} + +/* + * ExtendPCR request structure: + * + * { + * Map(1) { + * key = String("ExtendPCR"), + * value = Map(2) { + * key = String("index"), + * value = Uint8(pcr), + * key = String("data"), + * value = Byte_String(data) || String(data), + * } + * } + * } + */ +typedef struct NSMExtendPCRReq { + uint8_t index; + uint16_t data_len; + uint8_t data[NSM_REQUEST_MAX_SIZE]; +} NSMExtendPCRReq; + +static enum NSMResponseTypes get_nsm_extend_pcr_req(uint8_t *req, size_t len, + NSMExtendPCRReq *nsm_req) +{ + cbor_item_t *item = NULL; + size_t size ; + uint8_t *str; + bool index_found = false; + bool data_found = false; + struct cbor_pair *pair; + struct cbor_load_result result; + enum NSMResponseTypes r = NSM_INVALID_OPERATION; + + item = cbor_load(req, len, &result); + if (!item || result.error.code != CBOR_ERR_NONE) { + goto cleanup; + } + + pair = cbor_map_handle(item); + if (!cbor_isa_map(pair->value)) { + goto cleanup; + } + size = cbor_map_size(pair->value); + if (size < 2) { + goto cleanup; + } + + pair = cbor_map_handle(pair->value); + for (int i = 0; i < size; ++i) { + if (!cbor_isa_string(pair[i].key)) { + continue; + } + str = cbor_string_handle(pair[i].key); + if (!str) { + continue; + } + + if (cbor_string_length(pair[i].key) == 5 && + memcmp(str, "index", 5) == 0) { + if (!cbor_isa_uint(pair[i].value) || + cbor_int_get_width(pair[i].value) != CBOR_INT_8) { + goto cleanup; + } + nsm_req->index = cbor_get_uint8(pair[i].value); + index_found = true; + continue; + } + + if (cbor_string_length(pair[i].key) == 4 && + memcmp(str, "data", 4) == 0) { + if (cbor_isa_bytestring(pair[i].value)) { + str = cbor_bytestring_handle(pair[i].value); + if (!str) { + goto cleanup; + } + nsm_req->data_len = cbor_bytestring_length(pair[i].value); + } else if (cbor_isa_string(pair[i].value)) { + str = cbor_string_handle(pair[i].value); + if (!str) { + goto cleanup; + } + nsm_req->data_len = cbor_string_length(pair[i].value); + } else { + goto cleanup; + } + /* + * nsm_req->data_len will be smaller than NSM_REQUEST_MAX_SIZE as + * we already check for the max request size before processing + * any request. So it's safe to copy. + */ + memcpy(nsm_req->data, str, nsm_req->data_len); + data_found = true; + continue; + } + } + + if (index_found && data_found) { + r = NSM_SUCCESS; + } + + cleanup: + if (item) { + cbor_decref(&item); + } + return r; +} + +/* + * ExtendPCR response structure: + * + * { + * Map(1) { + * key = String("ExtendPCR"), + * value = Map(1) { + * key = String("data"), + * value = Byte_String() + * } + * } + * } + */ +static bool handle_extend_pcr(VirtIONSM *vnsm, struct iovec *request, + struct iovec *response, Error **errp) +{ + cbor_item_t *root = NULL; + cbor_item_t *nested_map; + size_t len; + struct PCRInfo *pcr; + enum NSMResponseTypes type; + bool r = false; + g_autofree NSMExtendPCRReq *nsm_req = g_malloc(sizeof(NSMExtendPCRReq)); + + type = get_nsm_extend_pcr_req(request->iov_base, request->iov_len, + nsm_req); + if (type != NSM_SUCCESS) { + if (error_response(response, type, errp)) { + r = true; + } + goto out; + } + if (nsm_req->index >= vnsm->max_pcrs) { + if (error_response(response, NSM_INVALID_INDEX, errp)) { + r = true; + } + goto out; + } + + pcr = &(vnsm->pcrs[nsm_req->index]); + + if (pcr->locked) { + if (error_response(response, NSM_READONLY_INDEX, errp)) { + r = true; + } + goto out; + } + + if (!vnsm->extend_pcr(vnsm, nsm_req->index, nsm_req->data, + nsm_req->data_len)) { + if (error_response(response, NSM_INTERNAL_ERROR, errp)) { + r = true; + } + goto out; + } + + root = cbor_new_definite_map(1); + if (!root) { + goto err; + } + + if (!qemu_cbor_add_map_to_map(root, "ExtendPCR", 1, &nested_map)) { + goto err; + } + + if (!qemu_cbor_add_bytestring_to_map(nested_map, "data", pcr->data, + QCRYPTO_HASH_DIGEST_LEN_SHA384)) { + goto err; + } + + len = cbor_serialize(root, response->iov_base, response->iov_len); + if (len == 0) { + if (error_response(response, NSM_BUFFER_TOO_SMALL, errp)) { + r = true; + } + goto out; + } + + response->iov_len = len; + r = true; + + out: + if (root) { + cbor_decref(&root); + } + return r; + + err: + error_setg(errp, "Failed to initialize DescribePCR response"); + goto out; +} + +/* + * LockPCR request structure: + * + * { + * Map(1) { + * key = String("LockPCR"), + * value = Map(1) { + * key = String("index"), + * value = Uint8(pcr) + * } + * } + * } + */ +typedef struct NSMLockPCRReq { + uint8_t index; +} NSMLockPCRReq; + +static enum NSMResponseTypes get_nsm_lock_pcr_req(uint8_t *req, size_t len, + NSMLockPCRReq *nsm_req) +{ + cbor_item_t *item = NULL; + size_t size; + uint8_t *str; + struct cbor_pair *pair; + struct cbor_load_result result; + enum NSMResponseTypes r = NSM_INVALID_OPERATION; + + item = cbor_load(req, len, &result); + if (!item || result.error.code != CBOR_ERR_NONE) { + goto cleanup; + } + + pair = cbor_map_handle(item); + if (!cbor_isa_map(pair->value)) { + goto cleanup; + } + size = cbor_map_size(pair->value); + if (size < 1) { + goto cleanup; + } + + pair = cbor_map_handle(pair->value); + for (int i = 0; i < size; ++i) { + if (!cbor_isa_string(pair[i].key)) { + continue; + } + str = cbor_string_handle(pair[i].key); + if (str && cbor_string_length(pair[i].key) == 5 && + memcmp(str, "index", 5) == 0) { + if (!cbor_isa_uint(pair[i].value) || + cbor_int_get_width(pair[i].value) != CBOR_INT_8) { + break; + } + + nsm_req->index = cbor_get_uint8(pair[i].value); + r = NSM_SUCCESS; + break; + } + } + + cleanup: + if (item) { + cbor_decref(&item); + } + return r; +} + +/* + * LockPCR success response structure: + * { + * String("LockPCR") + * } + */ +static bool handle_lock_pcr(VirtIONSM *vnsm, struct iovec *request, + struct iovec *response, Error **errp) +{ + cbor_item_t *root = NULL; + size_t len; + NSMLockPCRReq nsm_req; + enum NSMResponseTypes type; + struct PCRInfo *pcr; + bool r = false; + + type = get_nsm_lock_pcr_req(request->iov_base, request->iov_len, &nsm_req); + if (type != NSM_SUCCESS) { + if (error_response(response, type, errp)) { + r = true; + } + goto cleanup; + } + if (nsm_req.index >= vnsm->max_pcrs) { + if (error_response(response, NSM_INVALID_INDEX, errp)) { + r = true; + } + goto cleanup; + } + + pcr = &(vnsm->pcrs[nsm_req.index]); + + if (pcr->locked) { + if (error_response(response, NSM_READONLY_INDEX, errp)) { + r = true; + } + goto cleanup; + } + + pcr->locked = true; + + root = cbor_build_string("LockPCR"); + if (!root) { + goto err; + } + + len = cbor_serialize(root, response->iov_base, response->iov_len); + if (len == 0) { + if (error_response(response, NSM_BUFFER_TOO_SMALL, errp)) { + r = true; + } + goto cleanup; + } + + response->iov_len = len; + r = true; + goto cleanup; + + err: + error_setg(errp, "Failed to initialize LockPCR response"); + + cleanup: + if (root) { + cbor_decref(&root); + } + return r; +} + +/* + * LockPCRs request structure: + * + * { + * Map(1) { + * key = String("LockPCRs"), + * value = Map(1) { + * key = String("range"), + * value = Uint8(pcr) + * } + * } + * } + */ +typedef struct NSMLockPCRsReq { + uint16_t range; +} NSMLockPCRsReq; + +static enum NSMResponseTypes get_nsm_lock_pcrs_req(uint8_t *req, size_t len, + NSMLockPCRsReq *nsm_req) +{ + cbor_item_t *item = NULL; + size_t size; + uint8_t *str; + struct cbor_pair *pair; + struct cbor_load_result result; + enum NSMResponseTypes r = NSM_INVALID_OPERATION; + + item = cbor_load(req, len, &result); + if (!item || result.error.code != CBOR_ERR_NONE) { + goto cleanup; + } + + pair = cbor_map_handle(item); + if (!cbor_isa_map(pair->value)) { + goto cleanup; + } + size = cbor_map_size(pair->value); + if (size < 1) { + goto cleanup; + } + + pair = cbor_map_handle(pair->value); + for (int i = 0; i < size; ++i) { + if (!cbor_isa_string(pair[i].key)) { + continue; + } + str = cbor_string_handle(pair[i].key); + if (str && cbor_string_length(pair[i].key) == 5 && + memcmp(str, "range", 5) == 0) { + if (!cbor_isa_uint(pair[i].value) || + cbor_int_get_width(pair[i].value) != CBOR_INT_8) { + break; + } + + nsm_req->range = cbor_get_uint8(pair[i].value); + r = NSM_SUCCESS; + break; + } + } + + cleanup: + if (item) { + cbor_decref(&item); + } + return r; +} + +/* + * LockPCRs success response structure: + * { + * String("LockPCRs") + * } + */ +static bool handle_lock_pcrs(VirtIONSM *vnsm, struct iovec *request, + struct iovec *response, Error **errp) +{ + cbor_item_t *root = NULL; + size_t len; + NSMLockPCRsReq nsm_req; + enum NSMResponseTypes type; + bool r = false; + + type = get_nsm_lock_pcrs_req(request->iov_base, request->iov_len, &nsm_req); + if (type != NSM_SUCCESS) { + if (error_response(response, type, errp)) { + r = true; + } + goto cleanup; + } + if (nsm_req.range > vnsm->max_pcrs) { + if (error_response(response, NSM_INVALID_INDEX, errp)) { + r = true; + } + goto cleanup; + } + + for (int i = 0; i < nsm_req.range; ++i) { + vnsm->pcrs[i].locked = true; + } + + root = cbor_build_string("LockPCRs"); + if (!root) { + goto err; + } + + len = cbor_serialize(root, response->iov_base, response->iov_len); + if (len == 0) { + if (error_response(response, NSM_BUFFER_TOO_SMALL, errp)) { + r = true; + } + goto cleanup; + } + + response->iov_len = len; + r = true; + goto cleanup; + + err: + error_setg(errp, "Failed to initialize response"); + + cleanup: + if (root) { + cbor_decref(&root); + } + return r; +} + +/* + * Attestation request structure: + * + * Map(1) { + * key = String("Attestation"), + * value = Map(3) { + * key = String("user_data"), + * value = Byte_String() || null, // Optional + * key = String("nonce"), + * value = Byte_String() || null, // Optional + * key = String("public_key"), + * value = Byte_String() || null, // Optional + * } + * } + * } + */ + +struct AttestationProperty { + bool is_null; /* True if property is not present in map or is null */ + uint16_t len; + uint8_t buf[NSM_REQUEST_MAX_SIZE]; +}; + +typedef struct NSMAttestationReq { + struct AttestationProperty public_key; + struct AttestationProperty user_data; + struct AttestationProperty nonce; +} NSMAttestationReq; + +static bool fill_attestation_property(struct AttestationProperty *prop, + cbor_item_t *value) +{ + uint8_t *str; + bool ret = false; + + if (cbor_is_null(value)) { + prop->is_null = true; + ret = true; + goto out; + } else if (cbor_isa_bytestring(value)) { + str = cbor_bytestring_handle(value); + if (!str) { + goto out; + } + prop->len = cbor_bytestring_length(value); + } else if (cbor_isa_string(value)) { + str = cbor_string_handle(value); + if (!str) { + goto out; + } + prop->len = cbor_string_length(value); + } else { + goto out; + } + + /* + * prop->len will be smaller than NSM_REQUEST_MAX_SIZE as we + * already check for the max request size before processing + * any request. So it's safe to copy. + */ + memcpy(prop->buf, str, prop->len); + prop->is_null = false; + ret = true; + + out: + return ret; +} + +static enum NSMResponseTypes get_nsm_attestation_req(uint8_t *req, size_t len, + NSMAttestationReq *nsm_req) +{ + cbor_item_t *item = NULL; + size_t size; + uint8_t *str; + struct cbor_pair *pair; + struct cbor_load_result result; + enum NSMResponseTypes r = NSM_INVALID_OPERATION; + + nsm_req->public_key.is_null = true; + nsm_req->user_data.is_null = true; + nsm_req->nonce.is_null = true; + + item = cbor_load(req, len, &result); + if (!item || result.error.code != CBOR_ERR_NONE) { + goto cleanup; + } + + pair = cbor_map_handle(item); + if (!cbor_isa_map(pair->value)) { + goto cleanup; + } + size = cbor_map_size(pair->value); + if (size == 0) { + r = NSM_SUCCESS; + goto cleanup; + } + + pair = cbor_map_handle(pair->value); + for (int i = 0; i < size; ++i) { + if (!cbor_isa_string(pair[i].key)) { + continue; + } + + str = cbor_string_handle(pair[i].key); + if (!str) { + continue; + } + + if (cbor_string_length(pair[i].key) == 10 && + memcmp(str, "public_key", 10) == 0) { + if (!fill_attestation_property(&(nsm_req->public_key), + pair[i].value)) { + goto cleanup; + } + continue; + } + + if (cbor_string_length(pair[i].key) == 9 && + memcmp(str, "user_data", 9) == 0) { + if (!fill_attestation_property(&(nsm_req->user_data), + pair[i].value)) { + goto cleanup; + } + continue; + } + + if (cbor_string_length(pair[i].key) == 5 && + memcmp(str, "nonce", 5) == 0) { + if (!fill_attestation_property(&(nsm_req->nonce), pair[i].value)) { + goto cleanup; + } + continue; + } + } + + r = NSM_SUCCESS; + + cleanup: + if (item) { + cbor_decref(&item); + } + return r; +} + +static bool add_protected_header_to_cose(cbor_item_t *cose) +{ + cbor_item_t *map = NULL; + cbor_item_t *key = NULL; + cbor_item_t *value = NULL; + cbor_item_t *bs = NULL; + size_t len; + bool r = false; + size_t buf_len = 4096; + g_autofree uint8_t *buf = g_malloc(buf_len); + + map = cbor_new_definite_map(1); + if (!map) { + goto cleanup; + } + key = cbor_build_uint8(1); + if (!key) { + goto cleanup; + } + value = cbor_new_int8(); + if (!value) { + goto cleanup; + } + cbor_mark_negint(value); + /* we don't actually sign the data, so we use -1 as the 'alg' value */ + cbor_set_uint8(value, 0); + + if (!qemu_cbor_map_add(map, key, value)) { + goto cleanup; + } + + len = cbor_serialize(map, buf, buf_len); + if (len == 0) { + goto cleanup_map; + } + + bs = cbor_build_bytestring(buf, len); + if (!bs) { + goto cleanup_map; + } + if (!qemu_cbor_array_push(cose, bs)) { + cbor_decref(&bs); + goto cleanup_map; + } + r = true; + goto cleanup_map; + + cleanup: + if (key) { + cbor_decref(&key); + } + if (value) { + cbor_decref(&value); + } + + cleanup_map: + if (map) { + cbor_decref(&map); + } + return r; +} + +static bool add_unprotected_header_to_cose(cbor_item_t *cose) +{ + cbor_item_t *map = cbor_new_definite_map(0); + if (!map) { + goto cleanup; + } + if (!qemu_cbor_array_push(cose, map)) { + goto cleanup; + } + + return true; + + cleanup: + if (map) { + cbor_decref(&map); + } + return false; +} + +static bool add_ca_bundle_to_payload(cbor_item_t *map) +{ + cbor_item_t *key_cbor = NULL; + cbor_item_t *value_cbor = NULL; + cbor_item_t *bs = NULL; + uint8_t zero[64] = {0}; + + key_cbor = cbor_build_string("cabundle"); + if (!key_cbor) { + goto cleanup; + } + value_cbor = cbor_new_definite_array(1); + if (!value_cbor) { + goto cleanup; + } + bs = cbor_build_bytestring(zero, 64); + if (!bs) { + goto cleanup; + } + if (!qemu_cbor_array_push(value_cbor, bs)) { + cbor_decref(&bs); + goto cleanup; + } + if (!qemu_cbor_map_add(map, key_cbor, value_cbor)) { + goto cleanup; + } + + return true; + + cleanup: + if (key_cbor) { + cbor_decref(&key_cbor); + } + if (value_cbor) { + cbor_decref(&value_cbor); + } + return false; +} + +static bool add_payload_to_cose(cbor_item_t *cose, VirtIONSM *vnsm, + NSMAttestationReq *req) +{ + cbor_item_t *root = NULL; + cbor_item_t *nested_map; + cbor_item_t *bs = NULL; + size_t locked_cnt; + uint8_t ind[NSM_MAX_PCRS]; + size_t payload_map_size = 9; + size_t len; + struct PCRInfo *pcr; + uint8_t zero[64] = {0}; + bool r = false; + size_t buf_len = 16384; + g_autofree uint8_t *buf = g_malloc(buf_len); + + root = cbor_new_definite_map(payload_map_size); + if (!root) { + goto cleanup; + } + if (!qemu_cbor_add_string_to_map(root, "module_id", vnsm->module_id)) { + goto cleanup; + } + if (!qemu_cbor_add_string_to_map(root, "digest", vnsm->digest)) { + goto cleanup; + } + if (!qemu_cbor_add_uint64_to_map(root, "timestamp", + (uint64_t) time(NULL) * 1000)) { + goto cleanup; + } + + locked_cnt = 0; + for (uint8_t i = 0; i < NSM_MAX_PCRS; ++i) { + if (vnsm->pcrs[i].locked) { + ind[locked_cnt++] = i; + } + } + if (!qemu_cbor_add_map_to_map(root, "pcrs", locked_cnt, &nested_map)) { + goto cleanup; + } + for (uint8_t i = 0; i < locked_cnt; ++i) { + pcr = &(vnsm->pcrs[ind[i]]); + if (!qemu_cbor_add_uint8_key_bytestring_to_map( + nested_map, ind[i], + pcr->data, + QCRYPTO_HASH_DIGEST_LEN_SHA384)) { + goto cleanup; + } + } + if (!qemu_cbor_add_bytestring_to_map(root, "certificate", zero, 64)) { + goto cleanup; + } + if (!add_ca_bundle_to_payload(root)) { + goto cleanup; + } + + if (req->public_key.is_null) { + if (!qemu_cbor_add_null_to_map(root, "public_key")) { + goto cleanup; + } + } else if (!qemu_cbor_add_bytestring_to_map(root, "public_key", + req->public_key.buf, + req->public_key.len)) { + goto cleanup; + } + + if (req->user_data.is_null) { + if (!qemu_cbor_add_null_to_map(root, "user_data")) { + goto cleanup; + } + } else if (!qemu_cbor_add_bytestring_to_map(root, "user_data", + req->user_data.buf, + req->user_data.len)) { + goto cleanup; + } + + if (req->nonce.is_null) { + if (!qemu_cbor_add_null_to_map(root, "nonce")) { + goto cleanup; + } + } else if (!qemu_cbor_add_bytestring_to_map(root, "nonce", + req->nonce.buf, + req->nonce.len)) { + goto cleanup; + } + + len = cbor_serialize(root, buf, buf_len); + if (len == 0) { + goto cleanup; + } + + bs = cbor_build_bytestring(buf, len); + if (!bs) { + goto cleanup; + } + if (!qemu_cbor_array_push(cose, bs)) { + cbor_decref(&bs); + goto cleanup; + } + + r = true; + + cleanup: + if (root) { + cbor_decref(&root); + } + return r; +} + +static bool add_signature_to_cose(cbor_item_t *cose) +{ + cbor_item_t *bs = NULL; + uint8_t zero[64] = {0}; + + /* we don't actually sign the data, so we just put 64 zero bytes */ + bs = cbor_build_bytestring(zero, 64); + if (!bs) { + goto cleanup; + } + + if (!qemu_cbor_array_push(cose, bs)) { + goto cleanup; + } + + return true; + + cleanup: + if (bs) { + cbor_decref(&bs); + } + return false; +} + +/* + * Attestation response structure: + * + * { + * Map(1) { + * key = String("Attestation"), + * value = Map(1) { + * key = String("document"), + * value = Byte_String() + * } + * } + * } + * + * The document is a serialized COSE sign1 blob of the structure: + * { + * Array(4) { + * [0] { ByteString() }, // serialized protected header + * [1] { Map(0) }, // 0 length map + * [2] { ByteString() }, // serialized payload + * [3] { ByteString() }, // signature + * } + * } + * + * where [0] protected header is a serialized CBOR blob of the structure: + * { + * Map(1) { + * key = Uint8(1) // alg + * value = NegativeInt8() // Signing algorithm + * } + * } + * + * [2] payload is serialized CBOR blob of the structure: + * { + * Map(9) { + * [0] { key = String("module_id"), value = String(module_id) }, + * [1] { key = String("digest"), value = String("SHA384") }, + * [2] { + * key = String("timestamp"), + * value = Uint64(unix epoch of when document was created) + * }, + * [3] { + * key = String("pcrs"), + * value = Map(locked_pcr_cnt) { + * key = Uint8(pcr_index), + * value = ByteString(pcr_data) + * }, + * }, + * [4] { + * key = String("certificate"), + * value = ByteString(Signing certificate) + * }, + * [5] { key = String("cabundle"), value = Array(N) { ByteString()... } }, + * [6] { key = String("public_key"), value = ByteString() || null }, + * [7] { key = String("user_data"), value = ByteString() || null}, + * [8] { key = String("nonce"), value = ByteString() || null}, + * } + * } + */ +static bool handle_attestation(VirtIONSM *vnsm, struct iovec *request, + struct iovec *response, Error **errp) +{ + cbor_item_t *root = NULL; + cbor_item_t *cose = NULL; + cbor_item_t *nested_map; + size_t len; + enum NSMResponseTypes type; + bool r = false; + size_t buf_len = 16384; + g_autofree uint8_t *buf = g_malloc(buf_len); + g_autofree NSMAttestationReq *nsm_req = g_malloc(sizeof(NSMAttestationReq)); + + nsm_req->public_key.is_null = true; + nsm_req->user_data.is_null = true; + nsm_req->nonce.is_null = true; + + type = get_nsm_attestation_req(request->iov_base, request->iov_len, + nsm_req); + if (type != NSM_SUCCESS) { + if (error_response(response, type, errp)) { + r = true; + } + goto out; + } + + cose = cbor_new_definite_array(4); + if (!cose) { + goto err; + } + if (!add_protected_header_to_cose(cose)) { + goto err; + } + if (!add_unprotected_header_to_cose(cose)) { + goto err; + } + if (!add_payload_to_cose(cose, vnsm, nsm_req)) { + goto err; + } + if (!add_signature_to_cose(cose)) { + goto err; + } + + len = cbor_serialize(cose, buf, buf_len); + if (len == 0) { + goto err; + } + + root = cbor_new_definite_map(1); + if (!root) { + goto err; + } + if (!qemu_cbor_add_map_to_map(root, "Attestation", 1, &nested_map)) { + goto err; + } + if (!qemu_cbor_add_bytestring_to_map(nested_map, "document", buf, len)) { + goto err; + } + + len = cbor_serialize(root, response->iov_base, response->iov_len); + if (len == 0) { + if (error_response(response, NSM_INPUT_TOO_LARGE, errp)) { + r = true; + } + goto out; + } + + response->iov_len = len; + r = true; + + out: + if (root) { + cbor_decref(&root); + } + if (cose) { + cbor_decref(&cose); + } + return r; + + err: + error_setg(errp, "Failed to initialize Attestation response"); + goto out; +} + +enum CBOR_ROOT_TYPE { + CBOR_ROOT_TYPE_STRING = 0, + CBOR_ROOT_TYPE_MAP = 1, +}; + +struct nsm_cmd { + char name[16]; + /* + * There are 2 types of request + * 1) String(); "GetRandom", "DescribeNSM" + * 2) Map(1) { key: String(), value: ... } + */ + enum CBOR_ROOT_TYPE root_type; + bool (*response_fn)(VirtIONSM *vnsm, struct iovec *request, + struct iovec *response, Error **errp); +}; + +const struct nsm_cmd nsm_cmds[] = { + { "GetRandom", CBOR_ROOT_TYPE_STRING, handle_get_random }, + { "DescribeNSM", CBOR_ROOT_TYPE_STRING, handle_describe_nsm }, + { "DescribePCR", CBOR_ROOT_TYPE_MAP, handle_describe_pcr }, + { "ExtendPCR", CBOR_ROOT_TYPE_MAP, handle_extend_pcr }, + { "LockPCR", CBOR_ROOT_TYPE_MAP, handle_lock_pcr }, + { "LockPCRs", CBOR_ROOT_TYPE_MAP, handle_lock_pcrs }, + { "Attestation", CBOR_ROOT_TYPE_MAP, handle_attestation }, +}; + +static const struct nsm_cmd *get_nsm_request_cmd(uint8_t *buf, size_t len) +{ + size_t size; + uint8_t *req; + enum CBOR_ROOT_TYPE root_type; + struct cbor_load_result result; + cbor_item_t *item = cbor_load(buf, len, &result); + if (!item || result.error.code != CBOR_ERR_NONE) { + goto cleanup; + } + + if (cbor_isa_string(item)) { + size = cbor_string_length(item); + req = cbor_string_handle(item); + root_type = CBOR_ROOT_TYPE_STRING; + } else if (cbor_isa_map(item) && cbor_map_size(item) == 1) { + struct cbor_pair *handle = cbor_map_handle(item); + if (cbor_isa_string(handle->key)) { + size = cbor_string_length(handle->key); + req = cbor_string_handle(handle->key); + root_type = CBOR_ROOT_TYPE_MAP; + } else { + goto cleanup; + } + } else { + goto cleanup; + } + + if (size == 0 || req == NULL) { + goto cleanup; + } + + for (int i = 0; i < ARRAY_SIZE(nsm_cmds); ++i) { + if (nsm_cmds[i].root_type == root_type && + strlen(nsm_cmds[i].name) == size && + memcmp(nsm_cmds[i].name, req, size) == 0) { + cbor_decref(&item); + return &nsm_cmds[i]; + } + } + + cleanup: + if (item) { + cbor_decref(&item); + } + return NULL; +} + +static bool get_nsm_request_response(VirtIONSM *vnsm, struct iovec *req, + struct iovec *resp, Error **errp) +{ + const struct nsm_cmd *cmd; + + if (req->iov_len > NSM_REQUEST_MAX_SIZE) { + if (error_response(resp, NSM_INPUT_TOO_LARGE, errp)) { + return true; + } + error_setg(errp, "Failed to initialize InputTooLarge response"); + return false; + } + + cmd = get_nsm_request_cmd(req->iov_base, req->iov_len); + + if (cmd == NULL) { + if (error_response(resp, NSM_INVALID_OPERATION, errp)) { + return true; + } + error_setg(errp, "Failed to initialize InvalidOperation response"); + return false; + } + + return cmd->response_fn(vnsm, req, resp, errp); +} + +static void handle_input(VirtIODevice *vdev, VirtQueue *vq) +{ + g_autofree VirtQueueElement *out_elem = NULL; + g_autofree VirtQueueElement *in_elem = NULL; + VirtIONSM *vnsm = VIRTIO_NSM(vdev); + Error *err = NULL; + size_t sz; + struct iovec req = {.iov_base = NULL, .iov_len = 0}; + struct iovec res = {.iov_base = NULL, .iov_len = 0}; + + out_elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); + if (!out_elem) { + /* nothing in virtqueue */ + return; + } + + sz = iov_size(out_elem->out_sg, out_elem->out_num); + if (sz == 0) { + virtio_error(vdev, "Expected non-zero sized request buffer in " + "virtqueue"); + goto cleanup; + } + + in_elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); + if (!in_elem) { + virtio_error(vdev, "Expected response buffer after request buffer " + "in virtqueue"); + goto cleanup; + } + if (iov_size(in_elem->in_sg, in_elem->in_num) != NSM_RESPONSE_BUF_SIZE) { + virtio_error(vdev, "Expected response buffer of length 0x3000"); + goto cleanup; + } + + req.iov_base = g_malloc(sz); + req.iov_len = iov_to_buf(out_elem->out_sg, out_elem->out_num, 0, + req.iov_base, sz); + if (req.iov_len != sz) { + virtio_error(vdev, "Failed to copy request buffer"); + goto cleanup; + } + + res.iov_base = g_malloc(NSM_RESPONSE_BUF_SIZE); + res.iov_len = NSM_RESPONSE_BUF_SIZE; + + if (!get_nsm_request_response(vnsm, &req, &res, &err)) { + error_report_err(err); + virtio_error(vdev, "Failed to get NSM request response"); + goto cleanup; + } + + sz = iov_from_buf(in_elem->in_sg, in_elem->in_num, 0, res.iov_base, + res.iov_len); + if (sz != res.iov_len) { + virtio_error(vdev, "Failed to copy response buffer"); + goto cleanup; + } + + g_free(req.iov_base); + g_free(res.iov_base); + virtqueue_push(vq, out_elem, 0); + virtqueue_push(vq, in_elem, sz); + virtio_notify(vdev, vq); + return; + + cleanup: + g_free(req.iov_base); + g_free(res.iov_base); + if (out_elem) { + virtqueue_detach_element(vq, out_elem, 0); + } + if (in_elem) { + virtqueue_detach_element(vq, in_elem, 0); + } +} + +static uint64_t get_features(VirtIODevice *vdev, uint64_t f, Error **errp) +{ + return f; +} + +static bool extend_pcr(VirtIONSM *vnsm, int ind, uint8_t *data, uint16_t len) +{ + Error *err = NULL; + struct PCRInfo *pcr = &(vnsm->pcrs[ind]); + size_t digest_len = QCRYPTO_HASH_DIGEST_LEN_SHA384; + uint8_t result[QCRYPTO_HASH_DIGEST_LEN_SHA384]; + uint8_t *ptr = result; + struct iovec iov[2] = { + { .iov_base = pcr->data, .iov_len = QCRYPTO_HASH_DIGEST_LEN_SHA384 }, + { .iov_base = data, .iov_len = len }, + }; + + if (qcrypto_hash_bytesv(QCRYPTO_HASH_ALGO_SHA384, iov, 2, &ptr, &digest_len, + &err) < 0) { + return false; + } + + memcpy(pcr->data, result, QCRYPTO_HASH_DIGEST_LEN_SHA384); + return true; +} + +static void lock_pcr(VirtIONSM *vnsm, int ind) +{ + vnsm->pcrs[ind].locked = true; +} + +static void virtio_nsm_device_realize(DeviceState *dev, Error **errp) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VirtIONSM *vnsm = VIRTIO_NSM(dev); + + vnsm->max_pcrs = NSM_MAX_PCRS; + vnsm->digest = (char *) "SHA384"; + if (vnsm->module_id == NULL) { + vnsm->module_id = (char *) "i-234-enc5678"; + } + vnsm->version_major = 1; + vnsm->version_minor = 0; + vnsm->version_patch = 0; + vnsm->extend_pcr = extend_pcr; + vnsm->lock_pcr = lock_pcr; + + virtio_init(vdev, VIRTIO_ID_NITRO_SEC_MOD, 0); + + vnsm->vq = virtio_add_queue(vdev, 2, handle_input); +} + +static void virtio_nsm_device_unrealize(DeviceState *dev) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + + virtio_del_queue(vdev, 0); + virtio_cleanup(vdev); +} + +static const VMStateDescription vmstate_pcr_info_entry = { + .name = "pcr_info_entry", + .minimum_version_id = 1, + .version_id = 1, + .fields = (const VMStateField[]) { + VMSTATE_BOOL(locked, struct PCRInfo), + VMSTATE_UINT8_ARRAY(data, struct PCRInfo, + QCRYPTO_HASH_DIGEST_LEN_SHA384), + VMSTATE_END_OF_LIST() + }, +}; + +static const VMStateDescription vmstate_virtio_nsm_device = { + .name = "virtio-nsm-device", + .minimum_version_id = 1, + .version_id = 1, + .fields = (const VMStateField[]) { + VMSTATE_STRUCT_ARRAY(pcrs, VirtIONSM, NSM_MAX_PCRS, 1, + vmstate_pcr_info_entry, struct PCRInfo), + VMSTATE_END_OF_LIST() + }, +}; + +static const VMStateDescription vmstate_virtio_nsm = { + .name = "virtio-nsm", + .minimum_version_id = 1, + .version_id = 1, + .fields = (const VMStateField[]) { + VMSTATE_VIRTIO_DEVICE, + VMSTATE_END_OF_LIST() + }, +}; + +static const Property virtio_nsm_properties[] = { + DEFINE_PROP_STRING("module-id", VirtIONSM, module_id), +}; + +static void virtio_nsm_class_init(ObjectClass *klass, const void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); + + device_class_set_props(dc, virtio_nsm_properties); + dc->vmsd = &vmstate_virtio_nsm; + set_bit(DEVICE_CATEGORY_MISC, dc->categories); + vdc->realize = virtio_nsm_device_realize; + vdc->unrealize = virtio_nsm_device_unrealize; + vdc->get_features = get_features; + vdc->vmsd = &vmstate_virtio_nsm_device; +} + +static const TypeInfo virtio_nsm_info = { + .name = TYPE_VIRTIO_NSM, + .parent = TYPE_VIRTIO_DEVICE, + .instance_size = sizeof(VirtIONSM), + .class_init = virtio_nsm_class_init, +}; + +static void virtio_register_types(void) +{ + type_register_static(&virtio_nsm_info); +} + +type_init(virtio_register_types) diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index b1d02f4..fba2372 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -33,12 +33,12 @@ #include "hw/pci/msi.h" #include "hw/pci/msix.h" #include "hw/loader.h" -#include "sysemu/kvm.h" +#include "system/kvm.h" #include "hw/virtio/virtio-pci.h" #include "qemu/range.h" #include "hw/virtio/virtio-bus.h" #include "qapi/visitor.h" -#include "sysemu/replay.h" +#include "system/replay.h" #include "trace.h" #define VIRTIO_PCI_REGION_SIZE(dev) VIRTIO_PCI_CONFIG_OFF(msix_present(dev)) @@ -146,9 +146,7 @@ static const VMStateDescription vmstate_virtio_pci = { static bool virtio_pci_has_extra_state(DeviceState *d) { - VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); - - return proxy->flags & VIRTIO_PCI_FLAG_MIGRATE_EXTRA; + return true; } static void virtio_pci_save_extra_state(DeviceState *d, QEMUFile *f) @@ -384,7 +382,7 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val) { VirtIOPCIProxy *proxy = opaque; VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); - uint16_t vector; + uint16_t vector, vq_idx; hwaddr pa; switch (addr) { @@ -408,8 +406,14 @@ static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val) vdev->queue_sel = val; break; case VIRTIO_PCI_QUEUE_NOTIFY: - if (val < VIRTIO_QUEUE_MAX) { - virtio_queue_notify(vdev, val); + vq_idx = val; + if (vq_idx < VIRTIO_QUEUE_MAX && virtio_queue_get_num(vdev, vq_idx)) { + if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFICATION_DATA)) { + VirtQueue *vq = virtio_get_queue(vdev, vq_idx); + + virtio_queue_set_shadow_avail_idx(vq, val >> 16); + } + virtio_queue_notify(vdev, vq_idx); } break; case VIRTIO_PCI_STATUS: @@ -609,8 +613,12 @@ static MemoryRegion *virtio_address_space_lookup(VirtIOPCIProxy *proxy, reg = &proxy->regs[i]; if (*off >= reg->offset && *off + len <= reg->offset + reg->size) { - *off -= reg->offset; - return ®->mr; + MemoryRegionSection mrs = memory_region_find(®->mr, + *off - reg->offset, len); + assert(mrs.mr); + *off = mrs.offset_within_region; + memory_region_unref(mrs.mr); + return mrs.mr; } } @@ -860,6 +868,9 @@ static int virtio_pci_get_notifier(VirtIOPCIProxy *proxy, int queue_no, VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); VirtQueue *vq; + if (!proxy->vector_irqfd && vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) + return -1; + if (queue_no == VIRTIO_CONFIG_IRQ_IDX) { *n = virtio_config_get_guest_notifier(vdev); *vector = vdev->config_vector; @@ -892,7 +903,7 @@ static int kvm_virtio_pci_vector_use_one(VirtIOPCIProxy *proxy, int queue_no) } ret = kvm_virtio_pci_vq_vector_use(proxy, vector); if (ret < 0) { - goto undo; + return ret; } /* * If guest supports masking, set up irqfd now. @@ -902,25 +913,11 @@ static int kvm_virtio_pci_vector_use_one(VirtIOPCIProxy *proxy, int queue_no) ret = kvm_virtio_pci_irqfd_use(proxy, n, vector); if (ret < 0) { kvm_virtio_pci_vq_vector_release(proxy, vector); - goto undo; + return ret; } } return 0; -undo: - - vector = virtio_queue_vector(vdev, queue_no); - if (vector >= msix_nr_vectors_allocated(dev)) { - return ret; - } - if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { - ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); - if (ret < 0) { - return ret; - } - kvm_virtio_pci_irqfd_release(proxy, n, vector); - } - return ret; } static int kvm_virtio_pci_vector_vq_use(VirtIOPCIProxy *proxy, int nvqs) { @@ -1216,7 +1213,12 @@ static int virtio_pci_set_guest_notifier(DeviceState *d, int n, bool assign, static bool virtio_pci_query_guest_notifiers(DeviceState *d) { VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); - return msix_enabled(&proxy->pci_dev); + + if (msix_enabled(&proxy->pci_dev)) { + return true; + } else { + return pci_irq_disabled(&proxy->pci_dev); + } } static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign) @@ -1963,6 +1965,7 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp) uint8_t *config; uint32_t size; VirtIODevice *vdev = virtio_bus_get_device(bus); + int16_t res; /* * Virtio capabilities present without @@ -2058,6 +2061,8 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp) if (modern_pio) { memory_region_init(&proxy->io_bar, OBJECT(proxy), "virtio-pci-io", 0x4); + address_space_init(&proxy->modern_cfg_io_as, &proxy->io_bar, + "virtio-pci-cfg-io-as"); pci_register_bar(&proxy->pci_dev, proxy->modern_io_bar_idx, PCI_BASE_ADDRESS_SPACE_IO, &proxy->io_bar); @@ -2108,6 +2113,18 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp) pci_register_bar(&proxy->pci_dev, proxy->legacy_io_bar_idx, PCI_BASE_ADDRESS_SPACE_IO, &proxy->bar); } + + if (pci_is_vf(&proxy->pci_dev)) { + pcie_ari_init(&proxy->pci_dev, proxy->last_pcie_cap_offset); + proxy->last_pcie_cap_offset += PCI_ARI_SIZEOF; + } else { + res = pcie_sriov_pf_init_from_user_created_vfs( + &proxy->pci_dev, proxy->last_pcie_cap_offset, errp); + if (res > 0) { + proxy->last_pcie_cap_offset += res; + virtio_add_feature(&vdev->host_features, VIRTIO_F_SR_IOV); + } + } } static void virtio_pci_device_unplugged(DeviceState *d) @@ -2181,6 +2198,9 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp) /* PCI BAR regions must be powers of 2 */ pow2ceil(proxy->notify.offset + proxy->notify.size)); + address_space_init(&proxy->modern_cfg_mem_as, &proxy->modern_bar, + "virtio-pci-cfg-mem-as"); + if (proxy->disable_legacy == ON_OFF_AUTO_AUTO) { proxy->disable_legacy = pcie_port ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF; } @@ -2195,19 +2215,16 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp) if (pcie_port && pci_is_express(pci_dev)) { int pos; - uint16_t last_pcie_cap_offset = PCI_CONFIG_SPACE_SIZE; + proxy->last_pcie_cap_offset = PCI_CONFIG_SPACE_SIZE; pos = pcie_endpoint_cap_init(pci_dev, 0); assert(pos > 0); - pos = pci_add_capability(pci_dev, PCI_CAP_ID_PM, 0, - PCI_PM_SIZEOF, errp); + pos = pci_pm_init(pci_dev, 0, errp); if (pos < 0) { return; } - pci_dev->exp.pm_cap = pos; - /* * Indicates that this function complies with revision 1.2 of the * PCI Power Management Interface Specification. @@ -2215,9 +2232,9 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp) pci_set_word(pci_dev->config + pos + PCI_PM_PMC, 0x3); if (proxy->flags & VIRTIO_PCI_FLAG_AER) { - pcie_aer_init(pci_dev, PCI_ERR_VER, last_pcie_cap_offset, + pcie_aer_init(pci_dev, PCI_ERR_VER, proxy->last_pcie_cap_offset, PCI_ERR_SIZEOF, NULL); - last_pcie_cap_offset += PCI_ERR_SIZEOF; + proxy->last_pcie_cap_offset += PCI_ERR_SIZEOF; } if (proxy->flags & VIRTIO_PCI_FLAG_INIT_DEVERR) { @@ -2230,6 +2247,11 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp) pcie_cap_lnkctl_init(pci_dev); } + if (proxy->flags & VIRTIO_PCI_FLAG_PM_NO_SOFT_RESET) { + pci_set_word(pci_dev->config + pos + PCI_PM_CTRL, + PCI_PM_CTRL_NO_SOFT_RESET); + } + if (proxy->flags & VIRTIO_PCI_FLAG_INIT_PM) { /* Init Power Management Control Register */ pci_set_word(pci_dev->wmask + pos + PCI_PM_CTRL, @@ -2237,9 +2259,9 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp) } if (proxy->flags & VIRTIO_PCI_FLAG_ATS) { - pcie_ats_init(pci_dev, last_pcie_cap_offset, + pcie_ats_init(pci_dev, proxy->last_pcie_cap_offset, proxy->flags & VIRTIO_PCI_FLAG_ATS_PAGE_ALIGNED); - last_pcie_cap_offset += PCI_EXT_CAP_ATS_SIZEOF; + proxy->last_pcie_cap_offset += PCI_EXT_CAP_ATS_SIZEOF; } if (proxy->flags & VIRTIO_PCI_FLAG_INIT_FLR) { @@ -2265,12 +2287,18 @@ static void virtio_pci_exit(PCIDevice *pci_dev) VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev); bool pcie_port = pci_bus_is_express(pci_get_bus(pci_dev)) && !pci_bus_is_root(pci_get_bus(pci_dev)); + bool modern_pio = proxy->flags & VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY; + pcie_sriov_pf_exit(&proxy->pci_dev); msix_uninit_exclusive_bar(pci_dev); if (proxy->flags & VIRTIO_PCI_FLAG_AER && pcie_port && pci_is_express(pci_dev)) { pcie_aer_exit(pci_dev); } + address_space_destroy(&proxy->modern_cfg_mem_as); + if (modern_pio) { + address_space_destroy(&proxy->modern_cfg_io_as); + } } static void virtio_pci_reset(DeviceState *qdev) @@ -2292,30 +2320,54 @@ static void virtio_pci_reset(DeviceState *qdev) } } +static bool virtio_pci_no_soft_reset(PCIDevice *dev) +{ + uint16_t pmcsr; + + if (!pci_is_express(dev) || !(dev->cap_present & QEMU_PCI_CAP_PM)) { + return false; + } + + pmcsr = pci_get_word(dev->config + dev->pm_cap + PCI_PM_CTRL); + + /* + * When No_Soft_Reset bit is set and the device + * is in D3hot state, don't reset device + */ + return (pmcsr & PCI_PM_CTRL_NO_SOFT_RESET) && + (pmcsr & PCI_PM_CTRL_STATE_MASK) == 3; +} + static void virtio_pci_bus_reset_hold(Object *obj, ResetType type) { PCIDevice *dev = PCI_DEVICE(obj); DeviceState *qdev = DEVICE(obj); + if (virtio_pci_no_soft_reset(dev)) { + return; + } + virtio_pci_reset(qdev); if (pci_is_express(dev)) { + VirtIOPCIProxy *proxy = VIRTIO_PCI(dev); + pcie_cap_deverr_reset(dev); pcie_cap_lnkctl_reset(dev); - pci_set_word(dev->config + dev->exp.pm_cap + PCI_PM_CTRL, 0); + if (proxy->flags & VIRTIO_PCI_FLAG_INIT_PM) { + pci_word_test_and_clear_mask( + dev->config + dev->pm_cap + PCI_PM_CTRL, + PCI_PM_CTRL_STATE_MASK); + } } } -static Property virtio_pci_properties[] = { +static const Property virtio_pci_properties[] = { DEFINE_PROP_BIT("virtio-pci-bus-master-bug-migration", VirtIOPCIProxy, flags, VIRTIO_PCI_FLAG_BUS_MASTER_BUG_MIGRATION_BIT, false), - DEFINE_PROP_BIT("migrate-extra", VirtIOPCIProxy, flags, - VIRTIO_PCI_FLAG_MIGRATE_EXTRA_BIT, true), DEFINE_PROP_BIT("modern-pio-notify", VirtIOPCIProxy, flags, VIRTIO_PCI_FLAG_MODERN_PIO_NOTIFY_BIT, false), - DEFINE_PROP_BIT("x-disable-pcie", VirtIOPCIProxy, flags, - VIRTIO_PCI_FLAG_DISABLE_PCIE_BIT, false), DEFINE_PROP_BIT("page-per-vq", VirtIOPCIProxy, flags, VIRTIO_PCI_FLAG_PAGE_PER_VQ_BIT, false), DEFINE_PROP_BOOL("x-ignore-backend-features", VirtIOPCIProxy, @@ -2330,11 +2382,12 @@ static Property virtio_pci_properties[] = { VIRTIO_PCI_FLAG_INIT_LNKCTL_BIT, true), DEFINE_PROP_BIT("x-pcie-pm-init", VirtIOPCIProxy, flags, VIRTIO_PCI_FLAG_INIT_PM_BIT, true), + DEFINE_PROP_BIT("x-pcie-pm-no-soft-reset", VirtIOPCIProxy, flags, + VIRTIO_PCI_FLAG_PM_NO_SOFT_RESET_BIT, false), DEFINE_PROP_BIT("x-pcie-flr-init", VirtIOPCIProxy, flags, VIRTIO_PCI_FLAG_INIT_FLR_BIT, true), DEFINE_PROP_BIT("aer", VirtIOPCIProxy, flags, VIRTIO_PCI_FLAG_AER_BIT, false), - DEFINE_PROP_END_OF_LIST(), }; static void virtio_pci_dc_realize(DeviceState *qdev, Error **errp) @@ -2343,15 +2396,22 @@ static void virtio_pci_dc_realize(DeviceState *qdev, Error **errp) VirtIOPCIProxy *proxy = VIRTIO_PCI(qdev); PCIDevice *pci_dev = &proxy->pci_dev; - if (!(proxy->flags & VIRTIO_PCI_FLAG_DISABLE_PCIE) && - virtio_pci_modern(proxy)) { + if (virtio_pci_modern(proxy)) { pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS; } vpciklass->parent_dc_realize(qdev, errp); } -static void virtio_pci_class_init(ObjectClass *klass, void *data) +static int virtio_pci_sync_config(DeviceState *dev, Error **errp) +{ + VirtIOPCIProxy *proxy = VIRTIO_PCI(dev); + VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); + + return qdev_sync_config(DEVICE(vdev), errp); +} + +static void virtio_pci_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); @@ -2367,6 +2427,7 @@ static void virtio_pci_class_init(ObjectClass *klass, void *data) device_class_set_parent_realize(dc, virtio_pci_dc_realize, &vpciklass->parent_dc_realize); rc->phases.hold = virtio_pci_bus_reset_hold; + dc->sync_config = virtio_pci_sync_config; } static const TypeInfo virtio_pci_info = { @@ -2378,14 +2439,13 @@ static const TypeInfo virtio_pci_info = { .abstract = true, }; -static Property virtio_pci_generic_properties[] = { +static const Property virtio_pci_generic_properties[] = { DEFINE_PROP_ON_OFF_AUTO("disable-legacy", VirtIOPCIProxy, disable_legacy, ON_OFF_AUTO_AUTO), DEFINE_PROP_BOOL("disable-modern", VirtIOPCIProxy, disable_modern, false), - DEFINE_PROP_END_OF_LIST(), }; -static void virtio_pci_base_class_init(ObjectClass *klass, void *data) +static void virtio_pci_base_class_init(ObjectClass *klass, const void *data) { const VirtioPCIDeviceTypeInfo *t = data; if (t->class_init) { @@ -2393,7 +2453,7 @@ static void virtio_pci_base_class_init(ObjectClass *klass, void *data) } } -static void virtio_pci_generic_class_init(ObjectClass *klass, void *data) +static void virtio_pci_generic_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); @@ -2433,7 +2493,7 @@ void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t) .name = t->generic_name, .parent = base_type_info.name, .class_init = virtio_pci_generic_class_init, - .interfaces = (InterfaceInfo[]) { + .interfaces = (const InterfaceInfo[]) { { INTERFACE_PCIE_DEVICE }, { INTERFACE_CONVENTIONAL_PCI_DEVICE }, { } @@ -2449,18 +2509,18 @@ void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t) generic_type_info.parent = base_name; generic_type_info.class_init = virtio_pci_base_class_init; - generic_type_info.class_data = (void *)t; + generic_type_info.class_data = t; assert(!t->non_transitional_name); assert(!t->transitional_name); } else { base_type_info.class_init = virtio_pci_base_class_init; - base_type_info.class_data = (void *)t; + base_type_info.class_data = t; } - type_register(&base_type_info); + type_register_static(&base_type_info); if (generic_type_info.name) { - type_register(&generic_type_info); + type_register_static(&generic_type_info); } if (t->non_transitional_name) { @@ -2468,13 +2528,13 @@ void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t) .name = t->non_transitional_name, .parent = base_type_info.name, .instance_init = virtio_pci_non_transitional_instance_init, - .interfaces = (InterfaceInfo[]) { + .interfaces = (const InterfaceInfo[]) { { INTERFACE_PCIE_DEVICE }, { INTERFACE_CONVENTIONAL_PCI_DEVICE }, { } }, }; - type_register(&non_transitional_type_info); + type_register_static(&non_transitional_type_info); } if (t->transitional_name) { @@ -2482,7 +2542,7 @@ void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t) .name = t->transitional_name, .parent = base_type_info.name, .instance_init = virtio_pci_transitional_instance_init, - .interfaces = (InterfaceInfo[]) { + .interfaces = (const InterfaceInfo[]) { /* * Transitional virtio devices work only as Conventional PCI * devices because they require PIO ports. @@ -2491,7 +2551,7 @@ void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t) { } }, }; - type_register(&transitional_type_info); + type_register_static(&transitional_type_info); } g_free(base_name); } @@ -2538,7 +2598,7 @@ static void virtio_pci_bus_new(VirtioBusState *bus, size_t bus_size, qbus_init(bus, bus_size, TYPE_VIRTIO_PCI_BUS, qdev, virtio_bus_name); } -static void virtio_pci_bus_class_init(ObjectClass *klass, void *data) +static void virtio_pci_bus_class_init(ObjectClass *klass, const void *data) { BusClass *bus_class = BUS_CLASS(klass); VirtioBusClass *k = VIRTIO_BUS_CLASS(klass); diff --git a/hw/virtio/virtio-pmem-pci.c b/hw/virtio/virtio-pmem-pci.c index cfe7f3b..babd91c 100644 --- a/hw/virtio/virtio-pmem-pci.c +++ b/hw/virtio/virtio-pmem-pci.c @@ -80,7 +80,7 @@ static void virtio_pmem_pci_fill_device_info(const MemoryDeviceState *md, info->type = MEMORY_DEVICE_INFO_KIND_VIRTIO_PMEM; } -static void virtio_pmem_pci_class_init(ObjectClass *klass, void *data) +static void virtio_pmem_pci_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); diff --git a/hw/virtio/virtio-pmem.c b/hw/virtio/virtio-pmem.c index c3512c2..3416ea1 100644 --- a/hw/virtio/virtio-pmem.c +++ b/hw/virtio/virtio-pmem.c @@ -21,7 +21,7 @@ #include "hw/virtio/virtio-access.h" #include "standard-headers/linux/virtio_ids.h" #include "standard-headers/linux/virtio_pmem.h" -#include "sysemu/hostmem.h" +#include "system/hostmem.h" #include "block/aio.h" #include "block/thread-pool.h" #include "trace.h" @@ -155,14 +155,13 @@ static MemoryRegion *virtio_pmem_get_memory_region(VirtIOPMEM *pmem, return &pmem->memdev->mr; } -static Property virtio_pmem_properties[] = { +static const Property virtio_pmem_properties[] = { DEFINE_PROP_UINT64(VIRTIO_PMEM_ADDR_PROP, VirtIOPMEM, start, 0), DEFINE_PROP_LINK(VIRTIO_PMEM_MEMDEV_PROP, VirtIOPMEM, memdev, TYPE_MEMORY_BACKEND, HostMemoryBackend *), - DEFINE_PROP_END_OF_LIST(), }; -static void virtio_pmem_class_init(ObjectClass *klass, void *data) +static void virtio_pmem_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); diff --git a/hw/virtio/virtio-qmp.c b/hw/virtio/virtio-qmp.c index 1dd96ed..3b6377c 100644 --- a/hw/virtio/virtio-qmp.c +++ b/hw/virtio/virtio-qmp.c @@ -15,8 +15,8 @@ #include "qapi/error.h" #include "qapi/qapi-commands-virtio.h" #include "qapi/qapi-commands-qom.h" -#include "qapi/qmp/qobject.h" -#include "qapi/qmp/qjson.h" +#include "qobject/qobject.h" +#include "qobject/qjson.h" #include "hw/virtio/vhost-user.h" #include "standard-headers/linux/virtio_ids.h" @@ -121,6 +121,12 @@ static const qmp_virtio_feature_map_t vhost_user_protocol_map[] = { FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_STATUS, \ "VHOST_USER_PROTOCOL_F_STATUS: Querying and notifying back-end " "device status supported"), + FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_SHARED_OBJECT, \ + "VHOST_USER_PROTOCOL_F_SHARED_OBJECT: Backend shared object " + "supported"), + FEATURE_ENTRY(VHOST_USER_PROTOCOL_F_DEVICE_STATE, \ + "VHOST_USER_PROTOCOL_F_DEVICE_STATE: Backend device state transfer " + "supported"), { -1, "" } }; @@ -450,6 +456,9 @@ static const qmp_virtio_feature_map_t virtio_mem_feature_map[] = { FEATURE_ENTRY(VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE, \ "VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE: Unplugged memory cannot be " "accessed"), + FEATURE_ENTRY(VIRTIO_MEM_F_PERSISTENT_SUSPEND, \ + "VIRTIO_MEM_F_PERSISTENT_SUSPND: Plugged memory will remain " + "plugged when suspending+resuming"), { -1, "" } }; #endif diff --git a/hw/virtio/virtio-rng-pci.c b/hw/virtio/virtio-rng-pci.c index 6e76f8b..39b6003 100644 --- a/hw/virtio/virtio-rng-pci.c +++ b/hw/virtio/virtio-rng-pci.c @@ -32,12 +32,11 @@ struct VirtIORngPCI { VirtIORNG vdev; }; -static Property virtio_rng_properties[] = { +static const Property virtio_rng_properties[] = { DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags, VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, true), DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, DEV_NVECTORS_UNSPECIFIED), - DEFINE_PROP_END_OF_LIST(), }; static void virtio_rng_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) @@ -54,7 +53,7 @@ static void virtio_rng_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) } } -static void virtio_rng_pci_class_init(ObjectClass *klass, void *data) +static void virtio_rng_pci_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); diff --git a/hw/virtio/virtio-rng.c b/hw/virtio/virtio-rng.c index f74efff..3df5d25 100644 --- a/hw/virtio/virtio-rng.c +++ b/hw/virtio/virtio-rng.c @@ -17,8 +17,8 @@ #include "hw/virtio/virtio.h" #include "hw/qdev-properties.h" #include "hw/virtio/virtio-rng.h" -#include "sysemu/rng.h" -#include "sysemu/runstate.h" +#include "system/rng.h" +#include "system/runstate.h" #include "qom/object_interfaces.h" #include "trace.h" @@ -159,17 +159,18 @@ static void check_rate_limit(void *opaque) vrng->activate_timer = true; } -static void virtio_rng_set_status(VirtIODevice *vdev, uint8_t status) +static int virtio_rng_set_status(VirtIODevice *vdev, uint8_t status) { VirtIORNG *vrng = VIRTIO_RNG(vdev); if (!vdev->vm_running) { - return; + return 0; } vdev->status = status; /* Something changed, try to process buffers */ virtio_rng_process(vrng); + return 0; } static void virtio_rng_device_realize(DeviceState *dev, Error **errp) @@ -184,8 +185,9 @@ static void virtio_rng_device_realize(DeviceState *dev, Error **errp) /* Workaround: Property parsing does not enforce unsigned integers, * So this is a hack to reject such numbers. */ - if (vrng->conf.max_bytes > INT64_MAX) { - error_setg(errp, "'max-bytes' parameter must be non-negative, " + if (vrng->conf.max_bytes == 0 || + vrng->conf.max_bytes > INT64_MAX) { + error_setg(errp, "'max-bytes' parameter must be positive, " "and less than 2^63"); return; } @@ -248,7 +250,7 @@ static const VMStateDescription vmstate_virtio_rng = { }, }; -static Property virtio_rng_properties[] = { +static const Property virtio_rng_properties[] = { /* Set a default rate limit of 2^47 bytes per minute or roughly 2TB/s. If * you have an entropy source capable of generating more entropy than this * and you can pass it through via virtio-rng, then hats off to you. Until @@ -257,10 +259,9 @@ static Property virtio_rng_properties[] = { DEFINE_PROP_UINT64("max-bytes", VirtIORNG, conf.max_bytes, INT64_MAX), DEFINE_PROP_UINT32("period", VirtIORNG, conf.period_ms, 1 << 16), DEFINE_PROP_LINK("rng", VirtIORNG, conf.rng, TYPE_RNG_BACKEND, RngBackend *), - DEFINE_PROP_END_OF_LIST(), }; -static void virtio_rng_class_init(ObjectClass *klass, void *data) +static void virtio_rng_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); diff --git a/hw/virtio/virtio-scsi-pci.c b/hw/virtio/virtio-scsi-pci.c index e8e3442..af87759 100644 --- a/hw/virtio/virtio-scsi-pci.c +++ b/hw/virtio/virtio-scsi-pci.c @@ -35,12 +35,11 @@ struct VirtIOSCSIPCI { VirtIOSCSI vdev; }; -static Property virtio_scsi_pci_properties[] = { +static const Property virtio_scsi_pci_properties[] = { DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags, VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, true), DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, DEV_NVECTORS_UNSPECIFIED), - DEFINE_PROP_END_OF_LIST(), }; static void virtio_scsi_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) @@ -73,7 +72,7 @@ static void virtio_scsi_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) qdev_realize(vdev, BUS(&vpci_dev->bus), errp); } -static void virtio_scsi_pci_class_init(ObjectClass *klass, void *data) +static void virtio_scsi_pci_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); diff --git a/hw/virtio/virtio-serial-pci.c b/hw/virtio/virtio-serial-pci.c index cea31ad..3f212ff 100644 --- a/hw/virtio/virtio-serial-pci.c +++ b/hw/virtio/virtio-serial-pci.c @@ -69,15 +69,14 @@ static void virtio_serial_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) qdev_realize(vdev, BUS(&vpci_dev->bus), errp); } -static Property virtio_serial_pci_properties[] = { +static const Property virtio_serial_pci_properties[] = { DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags, VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, true), DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 2), DEFINE_PROP_UINT32("class", VirtIOPCIProxy, class_code, 0), - DEFINE_PROP_END_OF_LIST(), }; -static void virtio_serial_pci_class_init(ObjectClass *klass, void *data) +static void virtio_serial_pci_class_init(ObjectClass *klass, const void *data) { DeviceClass *dc = DEVICE_CLASS(klass); VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 893a072..82a285a 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -30,8 +30,8 @@ #include "hw/virtio/virtio-bus.h" #include "hw/qdev-properties.h" #include "hw/virtio/virtio-access.h" -#include "sysemu/dma.h" -#include "sysemu/runstate.h" +#include "system/dma.h" +#include "system/runstate.h" #include "virtio-qmp.h" #include "standard-headers/linux/virtio_ids.h" @@ -205,6 +205,15 @@ static const char *virtio_id_to_name(uint16_t device_id) return name; } +static void virtio_check_indirect_feature(VirtIODevice *vdev) +{ + if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC)) { + qemu_log_mask(LOG_GUEST_ERROR, + "Device %s: indirect_desc was not negotiated!\n", + vdev->name); + } +} + /* Called within call_rcu(). */ static void virtio_free_region_cache(VRingMemoryRegionCaches *caches) { @@ -323,7 +332,6 @@ static void vring_packed_event_read(VirtIODevice *vdev, /* Make sure flags is seen before off_wrap */ smp_rmb(); e->off_wrap = virtio_lduw_phys_cached(vdev, cache, off_off); - virtio_tswap16s(vdev, &e->flags); } static void vring_packed_off_wrap_write(VirtIODevice *vdev, @@ -745,6 +753,60 @@ int virtio_queue_empty(VirtQueue *vq) } } +static bool virtio_queue_split_poll(VirtQueue *vq, unsigned shadow_idx) +{ + if (unlikely(!vq->vring.avail)) { + return false; + } + + return (uint16_t)shadow_idx != vring_avail_idx(vq); +} + +static bool virtio_queue_packed_poll(VirtQueue *vq, unsigned shadow_idx) +{ + VRingPackedDesc desc; + VRingMemoryRegionCaches *caches; + + if (unlikely(!vq->vring.desc)) { + return false; + } + + caches = vring_get_region_caches(vq); + if (!caches) { + return false; + } + + vring_packed_desc_read(vq->vdev, &desc, &caches->desc, + shadow_idx, true); + + return is_desc_avail(desc.flags, vq->shadow_avail_wrap_counter); +} + +static bool virtio_queue_poll(VirtQueue *vq, unsigned shadow_idx) +{ + if (virtio_device_disabled(vq->vdev)) { + return false; + } + + if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { + return virtio_queue_packed_poll(vq, shadow_idx); + } else { + return virtio_queue_split_poll(vq, shadow_idx); + } +} + +bool virtio_queue_enable_notification_and_check(VirtQueue *vq, + int opaque) +{ + virtio_queue_set_notification(vq, 1); + + if (opaque >= 0) { + return virtio_queue_poll(vq, (unsigned)opaque); + } else { + return false; + } +} + static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem, unsigned int len) { @@ -873,6 +935,46 @@ static void virtqueue_packed_fill(VirtQueue *vq, const VirtQueueElement *elem, vq->used_elems[idx].ndescs = elem->ndescs; } +static void virtqueue_ordered_fill(VirtQueue *vq, const VirtQueueElement *elem, + unsigned int len) +{ + unsigned int i, steps, max_steps; + + i = vq->used_idx % vq->vring.num; + steps = 0; + /* + * We shouldn't need to increase 'i' by more than the distance + * between used_idx and last_avail_idx. + */ + max_steps = (vq->last_avail_idx - vq->used_idx) % vq->vring.num; + + /* Search for element in vq->used_elems */ + while (steps <= max_steps) { + /* Found element, set length and mark as filled */ + if (vq->used_elems[i].index == elem->index) { + vq->used_elems[i].len = len; + vq->used_elems[i].in_order_filled = true; + break; + } + + i += vq->used_elems[i].ndescs; + steps += vq->used_elems[i].ndescs; + + if (i >= vq->vring.num) { + i -= vq->vring.num; + } + } + + /* + * We should be able to find a matching VirtQueueElement in + * used_elems. If we don't, this is an error. + */ + if (steps >= max_steps) { + qemu_log_mask(LOG_GUEST_ERROR, "%s: %s cannot fill buffer id %u\n", + __func__, vq->vdev->name, elem->index); + } +} + static void virtqueue_packed_fill_desc(VirtQueue *vq, const VirtQueueElement *elem, unsigned int idx, @@ -923,7 +1025,9 @@ void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem, return; } - if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { + if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_IN_ORDER)) { + virtqueue_ordered_fill(vq, elem, len); + } else if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { virtqueue_packed_fill(vq, elem, len, idx); } else { virtqueue_split_fill(vq, elem, len, idx); @@ -982,6 +1086,73 @@ static void virtqueue_packed_flush(VirtQueue *vq, unsigned int count) } } +static void virtqueue_ordered_flush(VirtQueue *vq) +{ + unsigned int i = vq->used_idx % vq->vring.num; + unsigned int ndescs = 0; + uint16_t old = vq->used_idx; + uint16_t new; + bool packed; + VRingUsedElem uelem; + + packed = virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED); + + if (packed) { + if (unlikely(!vq->vring.desc)) { + return; + } + } else if (unlikely(!vq->vring.used)) { + return; + } + + /* First expected in-order element isn't ready, nothing to do */ + if (!vq->used_elems[i].in_order_filled) { + return; + } + + /* Search for filled elements in-order */ + while (vq->used_elems[i].in_order_filled) { + /* + * First entry for packed VQs is written last so the guest + * doesn't see invalid descriptors. + */ + if (packed && i != vq->used_idx) { + virtqueue_packed_fill_desc(vq, &vq->used_elems[i], ndescs, false); + } else if (!packed) { + uelem.id = vq->used_elems[i].index; + uelem.len = vq->used_elems[i].len; + vring_used_write(vq, &uelem, i); + } + + vq->used_elems[i].in_order_filled = false; + ndescs += vq->used_elems[i].ndescs; + i += vq->used_elems[i].ndescs; + if (i >= vq->vring.num) { + i -= vq->vring.num; + } + } + + if (packed) { + virtqueue_packed_fill_desc(vq, &vq->used_elems[vq->used_idx], 0, true); + vq->used_idx += ndescs; + if (vq->used_idx >= vq->vring.num) { + vq->used_idx -= vq->vring.num; + vq->used_wrap_counter ^= 1; + vq->signalled_used_valid = false; + } + } else { + /* Make sure buffer is written before we update index. */ + smp_wmb(); + new = old + ndescs; + vring_used_idx_set(vq, new); + if (unlikely((int16_t)(new - vq->signalled_used) < + (uint16_t)(new - old))) { + vq->signalled_used_valid = false; + } + } + vq->inuse -= ndescs; +} + void virtqueue_flush(VirtQueue *vq, unsigned int count) { if (virtio_device_disabled(vq->vdev)) { @@ -989,7 +1160,9 @@ void virtqueue_flush(VirtQueue *vq, unsigned int count) return; } - if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { + if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_IN_ORDER)) { + virtqueue_ordered_flush(vq); + } else if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { virtqueue_packed_flush(vq, count); } else { virtqueue_split_flush(vq, count); @@ -1332,9 +1505,9 @@ err: goto done; } -void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, - unsigned int *out_bytes, - unsigned max_in_bytes, unsigned max_out_bytes) +int virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, + unsigned int *out_bytes, unsigned max_in_bytes, + unsigned max_out_bytes) { uint16_t desc_size; VRingMemoryRegionCaches *caches; @@ -1367,7 +1540,7 @@ void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes, caches); } - return; + return (int)vq->shadow_avail_idx; err: if (in_bytes) { *in_bytes = 0; @@ -1375,6 +1548,8 @@ err: if (out_bytes) { *out_bytes = 0; } + + return -1; } int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes, @@ -1506,7 +1681,7 @@ static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_nu static void *virtqueue_split_pop(VirtQueue *vq, size_t sz) { - unsigned int i, head, max; + unsigned int i, head, max, idx; VRingMemoryRegionCaches *caches; MemoryRegionCache indirect_desc_cache; MemoryRegionCache *desc_cache; @@ -1514,8 +1689,8 @@ static void *virtqueue_split_pop(VirtQueue *vq, size_t sz) VirtIODevice *vdev = vq->vdev; VirtQueueElement *elem = NULL; unsigned out_num, in_num, elem_entries; - hwaddr addr[VIRTQUEUE_MAX_SIZE]; - struct iovec iov[VIRTQUEUE_MAX_SIZE]; + hwaddr QEMU_UNINITIALIZED addr[VIRTQUEUE_MAX_SIZE]; + struct iovec QEMU_UNINITIALIZED iov[VIRTQUEUE_MAX_SIZE]; VRingDesc desc; int rc; @@ -1567,6 +1742,7 @@ static void *virtqueue_split_pop(VirtQueue *vq, size_t sz) virtio_error(vdev, "Invalid size for indirect buffer table"); goto done; } + virtio_check_indirect_feature(vdev); /* loop over the indirect descriptor table */ len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as, @@ -1630,6 +1806,13 @@ static void *virtqueue_split_pop(VirtQueue *vq, size_t sz) elem->in_sg[i] = iov[out_num + i]; } + if (virtio_vdev_has_feature(vdev, VIRTIO_F_IN_ORDER)) { + idx = (vq->last_avail_idx - 1) % vq->vring.num; + vq->used_elems[idx].index = elem->index; + vq->used_elems[idx].len = elem->len; + vq->used_elems[idx].ndescs = elem->ndescs; + } + vq->inuse++; trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num); @@ -1653,8 +1836,8 @@ static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz) VirtIODevice *vdev = vq->vdev; VirtQueueElement *elem = NULL; unsigned out_num, in_num, elem_entries; - hwaddr addr[VIRTQUEUE_MAX_SIZE]; - struct iovec iov[VIRTQUEUE_MAX_SIZE]; + hwaddr QEMU_UNINITIALIZED addr[VIRTQUEUE_MAX_SIZE]; + struct iovec QEMU_UNINITIALIZED iov[VIRTQUEUE_MAX_SIZE]; VRingPackedDesc desc; uint16_t id; int rc; @@ -1697,6 +1880,7 @@ static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz) virtio_error(vdev, "Invalid size for indirect buffer table"); goto done; } + virtio_check_indirect_feature(vdev); /* loop over the indirect descriptor table */ len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as, @@ -1745,6 +1929,11 @@ static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz) &indirect_desc_cache); } while (rc == VIRTQUEUE_READ_DESC_MORE); + if (desc_cache != &indirect_desc_cache) { + /* Buffer ID is included in the last descriptor in the list. */ + id = desc.id; + } + /* Now copy what we have collected and mapped */ elem = virtqueue_alloc_element(sz, out_num, in_num); for (i = 0; i < out_num; i++) { @@ -1758,6 +1947,13 @@ static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz) elem->index = id; elem->ndescs = (desc_cache == &indirect_desc_cache) ? 1 : elem_entries; + + if (virtio_vdev_has_feature(vdev, VIRTIO_F_IN_ORDER)) { + vq->used_elems[vq->last_avail_idx].index = elem->index; + vq->used_elems[vq->last_avail_idx].len = elem->len; + vq->used_elems[vq->last_avail_idx].ndescs = elem->ndescs; + } + vq->last_avail_idx += elem->ndescs; vq->inuse += elem->ndescs; @@ -2036,12 +2232,12 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val) { VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); trace_virtio_set_status(vdev, val); + int ret = 0; if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) { if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) && val & VIRTIO_CONFIG_S_FEATURES_OK) { - int ret = virtio_validate_features(vdev); - + ret = virtio_validate_features(vdev); if (ret) { return ret; } @@ -2054,16 +2250,20 @@ int virtio_set_status(VirtIODevice *vdev, uint8_t val) } if (k->set_status) { - k->set_status(vdev, val); + ret = k->set_status(vdev, val); + if (ret) { + qemu_log("set %s status to %d failed, old status: %d\n", + vdev->name, val, vdev->status); + } } vdev->status = val; - return 0; + return ret; } static enum virtio_device_endian virtio_default_endian(void) { - if (target_words_bigendian()) { + if (target_big_endian()) { return VIRTIO_DEVICE_ENDIAN_BIG; } else { return VIRTIO_DEVICE_ENDIAN_LITTLE; @@ -2131,45 +2331,6 @@ void virtio_queue_enable(VirtIODevice *vdev, uint32_t queue_index) } } -void virtio_reset(void *opaque) -{ - VirtIODevice *vdev = opaque; - VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); - int i; - - virtio_set_status(vdev, 0); - if (current_cpu) { - /* Guest initiated reset */ - vdev->device_endian = virtio_current_cpu_endian(); - } else { - /* System reset */ - vdev->device_endian = virtio_default_endian(); - } - - if (vdev->vhost_started && k->get_vhost) { - vhost_reset_device(k->get_vhost(vdev)); - } - - if (k->reset) { - k->reset(vdev); - } - - vdev->start_on_kick = false; - vdev->started = false; - vdev->broken = false; - vdev->guest_features = 0; - vdev->queue_sel = 0; - vdev->status = 0; - vdev->disabled = false; - qatomic_set(&vdev->isr, 0); - vdev->config_vector = VIRTIO_NO_VECTOR; - virtio_notify_vector(vdev, vdev->config_vector); - - for(i = 0; i < VIRTIO_QUEUE_MAX; i++) { - __virtio_queue_reset(vdev, i); - } -} - void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr) { if (!vdev->vq[n].vring.num) { @@ -2264,6 +2425,24 @@ void virtio_queue_set_align(VirtIODevice *vdev, int n, int align) } } +void virtio_queue_set_shadow_avail_idx(VirtQueue *vq, uint16_t shadow_avail_idx) +{ + if (!vq->vring.desc) { + return; + } + + /* + * 16-bit data for packed VQs include 1-bit wrap counter and + * 15-bit shadow_avail_idx. + */ + if (virtio_vdev_has_feature(vq->vdev, VIRTIO_F_RING_PACKED)) { + vq->shadow_avail_wrap_counter = (shadow_avail_idx >> 15) & 0x1; + vq->shadow_avail_idx = shadow_avail_idx & 0x7FFF; + } else { + vq->shadow_avail_idx = shadow_avail_idx; + } +} + static void virtio_queue_notify_vq(VirtQueue *vq) { if (vq->vring.desc && vq->handle_output) { @@ -2962,6 +3141,63 @@ int virtio_set_features(VirtIODevice *vdev, uint64_t val) return ret; } +void virtio_reset(void *opaque) +{ + VirtIODevice *vdev = opaque; + VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); + int i; + + virtio_set_status(vdev, 0); + if (current_cpu) { + /* Guest initiated reset */ + vdev->device_endian = virtio_current_cpu_endian(); + } else { + /* System reset */ + vdev->device_endian = virtio_default_endian(); + } + + if (k->get_vhost) { + struct vhost_dev *hdev = k->get_vhost(vdev); + /* Only reset when vhost back-end is connected */ + if (hdev && hdev->vhost_ops) { + vhost_reset_device(hdev); + } + } + + if (k->reset) { + k->reset(vdev); + } + + vdev->start_on_kick = false; + vdev->started = false; + vdev->broken = false; + virtio_set_features_nocheck(vdev, 0); + vdev->queue_sel = 0; + vdev->status = 0; + vdev->disabled = false; + qatomic_set(&vdev->isr, 0); + vdev->config_vector = VIRTIO_NO_VECTOR; + virtio_notify_vector(vdev, vdev->config_vector); + + for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { + __virtio_queue_reset(vdev, i); + } +} + +static void virtio_device_check_notification_compatibility(VirtIODevice *vdev, + Error **errp) +{ + VirtioBusState *bus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev))); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(bus); + DeviceState *proxy = DEVICE(BUS(bus)->parent); + + if (virtio_host_has_feature(vdev, VIRTIO_F_NOTIFICATION_DATA) && + k->ioeventfd_enabled(proxy)) { + error_setg(errp, + "notification_data=on without ioeventfd=off is not supported"); + } +} + size_t virtio_get_config_size(const VirtIOConfigSizeParams *params, uint64_t host_features) { @@ -3034,6 +3270,13 @@ virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id) config_len--; } + if (vdc->pre_load_queues) { + ret = vdc->pre_load_queues(vdev); + if (ret) { + return ret; + } + } + num = qemu_get_be32(f); if (num > VIRTIO_QUEUE_MAX) { @@ -3191,7 +3434,7 @@ void virtio_cleanup(VirtIODevice *vdev) qemu_del_vm_change_state_handler(vdev->vmstate); } -static void virtio_vmstate_change(void *opaque, bool running, RunState state) +static int virtio_vmstate_change(void *opaque, bool running, RunState state) { VirtIODevice *vdev = opaque; BusState *qbus = qdev_get_parent_bus(DEVICE(vdev)); @@ -3208,8 +3451,12 @@ static void virtio_vmstate_change(void *opaque, bool running, RunState state) } if (!backend_run) { - virtio_set_status(vdev, vdev->status); + int ret = virtio_set_status(vdev, vdev->status); + if (ret) { + return ret; + } } + return 0; } void virtio_instance_init_common(Object *proxy_obj, void *data, @@ -3261,7 +3508,7 @@ void virtio_init(VirtIODevice *vdev, uint16_t device_id, size_t config_size) vdev->config = NULL; } vdev->vmstate = qdev_add_vm_change_state_handler(DEVICE(vdev), - virtio_vmstate_change, vdev); + NULL, virtio_vmstate_change, vdev); vdev->device_endian = virtio_default_endian(); vdev->use_guest_notifier_mask = true; } @@ -3420,7 +3667,6 @@ static void virtio_queue_packed_restore_last_avail_idx(VirtIODevice *vdev, int n) { /* We don't have a reference like avail idx in shared memory */ - return; } static void virtio_queue_split_restore_last_avail_idx(VirtIODevice *vdev, @@ -3445,10 +3691,9 @@ void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n) static void virtio_queue_packed_update_used_idx(VirtIODevice *vdev, int n) { /* used idx was updated through set_last_avail_idx() */ - return; } -static void virtio_split_packed_update_used_idx(VirtIODevice *vdev, int n) +static void virtio_queue_split_update_used_idx(VirtIODevice *vdev, int n) { RCU_READ_LOCK_GUARD(); if (vdev->vq[n].vring.desc) { @@ -3461,7 +3706,7 @@ void virtio_queue_update_used_idx(VirtIODevice *vdev, int n) if (virtio_vdev_has_feature(vdev, VIRTIO_F_RING_PACKED)) { return virtio_queue_packed_update_used_idx(vdev, n); } else { - return virtio_split_packed_update_used_idx(vdev, n); + return virtio_queue_split_update_used_idx(vdev, n); } } @@ -3722,6 +3967,14 @@ static void virtio_device_realize(DeviceState *dev, Error **errp) } } + /* Devices should not use both ioeventfd and notification data feature */ + virtio_device_check_notification_compatibility(vdev, &err); + if (err != NULL) { + error_propagate(errp, err); + vdc->unrealize(dev); + return; + } + virtio_bus_device_plugged(vdev, &err); if (err != NULL) { error_propagate(errp, err); @@ -3776,13 +4029,12 @@ static void virtio_device_instance_finalize(Object *obj) g_free(vdev->vector_queues); } -static Property virtio_properties[] = { +static const Property virtio_properties[] = { DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features), DEFINE_PROP_BOOL("use-started", VirtIODevice, use_started, true), DEFINE_PROP_BOOL("use-disabled-flag", VirtIODevice, use_disabled_flag, true), DEFINE_PROP_BOOL("x-disable-legacy-check", VirtIODevice, disable_legacy_check, false), - DEFINE_PROP_END_OF_LIST(), }; static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev) @@ -3905,7 +4157,7 @@ void virtio_device_release_ioeventfd(VirtIODevice *vdev) virtio_bus_release_ioeventfd(vbus); } -static void virtio_device_class_init(ObjectClass *klass, void *data) +static void virtio_device_class_init(ObjectClass *klass, const void *data) { /* Set the default value here. */ VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); |