diff options
Diffstat (limited to 'hw/virtio/vhost.c')
-rw-r--r-- | hw/virtio/vhost.c | 213 |
1 files changed, 123 insertions, 90 deletions
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 4acd77e..fc43853 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -26,7 +26,7 @@ #include "hw/mem/memory-device.h" #include "migration/blocker.h" #include "migration/qemu-file-types.h" -#include "sysemu/dma.h" +#include "system/dma.h" #include "trace.h" /* enabled until disconnected backend stabilizes */ @@ -43,8 +43,9 @@ do { } while (0) #endif -static struct vhost_log *vhost_log; -static struct vhost_log *vhost_log_shm; +static struct vhost_log *vhost_log[VHOST_BACKEND_TYPE_MAX]; +static struct vhost_log *vhost_log_shm[VHOST_BACKEND_TYPE_MAX]; +static QLIST_HEAD(, vhost_dev) vhost_log_devs[VHOST_BACKEND_TYPE_MAX]; /* Memslots used by backends that support private memslots (without an fd). */ static unsigned int used_memslots; @@ -149,6 +150,47 @@ bool vhost_dev_has_iommu(struct vhost_dev *dev) } } +static inline bool vhost_dev_should_log(struct vhost_dev *dev) +{ + assert(dev->vhost_ops); + assert(dev->vhost_ops->backend_type > VHOST_BACKEND_TYPE_NONE); + assert(dev->vhost_ops->backend_type < VHOST_BACKEND_TYPE_MAX); + + return dev == QLIST_FIRST(&vhost_log_devs[dev->vhost_ops->backend_type]); +} + +static inline void vhost_dev_elect_mem_logger(struct vhost_dev *hdev, bool add) +{ + VhostBackendType backend_type; + + assert(hdev->vhost_ops); + + backend_type = hdev->vhost_ops->backend_type; + assert(backend_type > VHOST_BACKEND_TYPE_NONE); + assert(backend_type < VHOST_BACKEND_TYPE_MAX); + + if (add && !QLIST_IS_INSERTED(hdev, logdev_entry)) { + if (QLIST_EMPTY(&vhost_log_devs[backend_type])) { + QLIST_INSERT_HEAD(&vhost_log_devs[backend_type], + hdev, logdev_entry); + } else { + /* + * The first vhost_device in the list is selected as the shared + * logger to scan memory sections. Put new entry next to the head + * to avoid inadvertent change to the underlying logger device. + * This is done in order to get better cache locality and to avoid + * performance churn on the hot path for log scanning. Even when + * new devices come and go quickly, it wouldn't end up changing + * the active leading logger device at all. + */ + QLIST_INSERT_AFTER(QLIST_FIRST(&vhost_log_devs[backend_type]), + hdev, logdev_entry); + } + } else if (!add && QLIST_IS_INSERTED(hdev, logdev_entry)) { + QLIST_REMOVE(hdev, logdev_entry); + } +} + static int vhost_sync_dirty_bitmap(struct vhost_dev *dev, MemoryRegionSection *section, hwaddr first, @@ -166,12 +208,14 @@ static int vhost_sync_dirty_bitmap(struct vhost_dev *dev, start_addr = MAX(first, start_addr); end_addr = MIN(last, end_addr); - for (i = 0; i < dev->mem->nregions; ++i) { - struct vhost_memory_region *reg = dev->mem->regions + i; - vhost_dev_sync_region(dev, section, start_addr, end_addr, - reg->guest_phys_addr, - range_get_last(reg->guest_phys_addr, - reg->memory_size)); + if (vhost_dev_should_log(dev)) { + for (i = 0; i < dev->mem->nregions; ++i) { + struct vhost_memory_region *reg = dev->mem->regions + i; + vhost_dev_sync_region(dev, section, start_addr, end_addr, + reg->guest_phys_addr, + range_get_last(reg->guest_phys_addr, + reg->memory_size)); + } } for (i = 0; i < dev->nvqs; ++i) { struct vhost_virtqueue *vq = dev->vqs + i; @@ -287,6 +331,10 @@ static int vhost_set_backend_type(struct vhost_dev *dev, r = -1; } + if (r == 0) { + assert(dev->vhost_ops->backend_type == backend_type); + } + return r; } @@ -319,16 +367,22 @@ static struct vhost_log *vhost_log_alloc(uint64_t size, bool share) return log; } -static struct vhost_log *vhost_log_get(uint64_t size, bool share) +static struct vhost_log *vhost_log_get(VhostBackendType backend_type, + uint64_t size, bool share) { - struct vhost_log *log = share ? vhost_log_shm : vhost_log; + struct vhost_log *log; + + assert(backend_type > VHOST_BACKEND_TYPE_NONE); + assert(backend_type < VHOST_BACKEND_TYPE_MAX); + + log = share ? vhost_log_shm[backend_type] : vhost_log[backend_type]; if (!log || log->size != size) { log = vhost_log_alloc(size, share); if (share) { - vhost_log_shm = log; + vhost_log_shm[backend_type] = log; } else { - vhost_log = log; + vhost_log[backend_type] = log; } } else { ++log->refcnt; @@ -340,11 +394,20 @@ static struct vhost_log *vhost_log_get(uint64_t size, bool share) static void vhost_log_put(struct vhost_dev *dev, bool sync) { struct vhost_log *log = dev->log; + VhostBackendType backend_type; if (!log) { return; } + assert(dev->vhost_ops); + backend_type = dev->vhost_ops->backend_type; + + if (backend_type == VHOST_BACKEND_TYPE_NONE || + backend_type >= VHOST_BACKEND_TYPE_MAX) { + return; + } + --log->refcnt; if (log->refcnt == 0) { /* Sync only the range covered by the old log */ @@ -352,18 +415,19 @@ static void vhost_log_put(struct vhost_dev *dev, bool sync) vhost_log_sync_range(dev, 0, dev->log_size * VHOST_LOG_CHUNK - 1); } - if (vhost_log == log) { + if (vhost_log[backend_type] == log) { g_free(log->log); - vhost_log = NULL; - } else if (vhost_log_shm == log) { + vhost_log[backend_type] = NULL; + } else if (vhost_log_shm[backend_type] == log) { qemu_memfd_free(log->log, log->size * sizeof(*(log->log)), log->fd); - vhost_log_shm = NULL; + vhost_log_shm[backend_type] = NULL; } g_free(log); } + vhost_dev_elect_mem_logger(dev, false); dev->log = NULL; dev->log_size = 0; } @@ -376,7 +440,8 @@ static bool vhost_dev_log_is_shared(struct vhost_dev *dev) static inline void vhost_dev_log_resize(struct vhost_dev *dev, uint64_t size) { - struct vhost_log *log = vhost_log_get(size, vhost_dev_log_is_shared(dev)); + struct vhost_log *log = vhost_log_get(dev->vhost_ops->backend_type, + size, vhost_dev_log_is_shared(dev)); uint64_t log_base = (uintptr_t)log->log; int r; @@ -667,7 +732,6 @@ out: memory_region_unref(old_sections[n_old_sections].mr); } g_free(old_sections); - return; } /* Adds the section data to the tmp_section structure. @@ -978,6 +1042,15 @@ static int vhost_dev_set_log(struct vhost_dev *dev, bool enable_log) goto err_vq; } } + + /* + * At log start we select our vhost_device logger that will scan the + * memory sections and skip for the others. This is possible because + * the log is shared amongst all vhost devices for a given type of + * backend. + */ + vhost_dev_elect_mem_logger(dev, enable_log); + return 0; err_vq: for (; i >= 0; --i) { @@ -1294,10 +1367,10 @@ fail_alloc_desc: return r; } -void vhost_virtqueue_stop(struct vhost_dev *dev, - struct VirtIODevice *vdev, - struct vhost_virtqueue *vq, - unsigned idx) +int vhost_virtqueue_stop(struct vhost_dev *dev, + struct VirtIODevice *vdev, + struct vhost_virtqueue *vq, + unsigned idx) { int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, idx); struct vhost_vring_state state = { @@ -1307,7 +1380,7 @@ void vhost_virtqueue_stop(struct vhost_dev *dev, if (virtio_queue_get_desc_addr(vdev, idx) == 0) { /* Don't stop the virtqueue which might have not been started */ - return; + return 0; } r = dev->vhost_ops->vhost_get_vring_base(dev, &state); @@ -1338,6 +1411,7 @@ void vhost_virtqueue_stop(struct vhost_dev *dev, 0, virtio_queue_get_avail_size(vdev, idx)); vhost_memory_unmap(dev, vq->desc, virtio_queue_get_desc_size(vdev, idx), 0, virtio_queue_get_desc_size(vdev, idx)); + return r; } static int vhost_virtqueue_set_busyloop_timeout(struct vhost_dev *dev, @@ -1608,9 +1682,9 @@ void vhost_dev_cleanup(struct vhost_dev *hdev) memset(hdev, 0, sizeof(struct vhost_dev)); } -static void vhost_dev_disable_notifiers_nvqs(struct vhost_dev *hdev, - VirtIODevice *vdev, - unsigned int nvqs) +void vhost_dev_disable_notifiers_nvqs(struct vhost_dev *hdev, + VirtIODevice *vdev, + unsigned int nvqs) { BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); int i, r; @@ -1856,62 +1930,6 @@ void vhost_dev_free_inflight(struct vhost_inflight *inflight) } } -static int vhost_dev_resize_inflight(struct vhost_inflight *inflight, - uint64_t new_size) -{ - Error *err = NULL; - int fd = -1; - void *addr = qemu_memfd_alloc("vhost-inflight", new_size, - F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL, - &fd, &err); - - if (err) { - error_report_err(err); - return -ENOMEM; - } - - vhost_dev_free_inflight(inflight); - inflight->offset = 0; - inflight->addr = addr; - inflight->fd = fd; - inflight->size = new_size; - - return 0; -} - -void vhost_dev_save_inflight(struct vhost_inflight *inflight, QEMUFile *f) -{ - if (inflight->addr) { - qemu_put_be64(f, inflight->size); - qemu_put_be16(f, inflight->queue_size); - qemu_put_buffer(f, inflight->addr, inflight->size); - } else { - qemu_put_be64(f, 0); - } -} - -int vhost_dev_load_inflight(struct vhost_inflight *inflight, QEMUFile *f) -{ - uint64_t size; - - size = qemu_get_be64(f); - if (!size) { - return 0; - } - - if (inflight->size != size) { - int ret = vhost_dev_resize_inflight(inflight, size); - if (ret < 0) { - return ret; - } - } - inflight->queue_size = qemu_get_be16(f); - - qemu_get_buffer(f, inflight->addr, size); - - return 0; -} - int vhost_dev_prepare_inflight(struct vhost_dev *hdev, VirtIODevice *vdev) { int r; @@ -2044,7 +2062,8 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) uint64_t log_base; hdev->log_size = vhost_get_log_size(hdev); - hdev->log = vhost_log_get(hdev->log_size, + hdev->log = vhost_log_get(hdev->vhost_ops->backend_type, + hdev->log_size, vhost_dev_log_is_shared(hdev)); log_base = (uintptr_t)hdev->log->log; r = hdev->vhost_ops->vhost_set_log_base(hdev, @@ -2054,6 +2073,7 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) VHOST_OPS_DEBUG(r, "vhost_set_log_base failed"); goto fail_log; } + vhost_dev_elect_mem_logger(hdev, true); } if (vrings) { r = vhost_dev_set_vring_enable(hdev, true); @@ -2075,11 +2095,22 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) * vhost-kernel code requires for this.*/ for (i = 0; i < hdev->nvqs; ++i) { struct vhost_virtqueue *vq = hdev->vqs + i; - vhost_device_iotlb_miss(hdev, vq->used_phys, true); + r = vhost_device_iotlb_miss(hdev, vq->used_phys, true); + if (r) { + goto fail_iotlb; + } } } vhost_start_config_intr(hdev); return 0; +fail_iotlb: + if (vhost_dev_has_iommu(hdev) && + hdev->vhost_ops->vhost_set_iotlb_callback) { + hdev->vhost_ops->vhost_set_iotlb_callback(hdev, false); + } + if (hdev->vhost_ops->vhost_dev_start) { + hdev->vhost_ops->vhost_dev_start(hdev, false); + } fail_start: if (vrings) { vhost_dev_set_vring_enable(hdev, false); @@ -2105,9 +2136,10 @@ fail_features: } /* Host notifiers must be enabled at this point. */ -void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) +int vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) { int i; + int rc = 0; /* should only be called after backend is connected */ assert(hdev->vhost_ops); @@ -2126,10 +2158,10 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) vhost_dev_set_vring_enable(hdev, false); } for (i = 0; i < hdev->nvqs; ++i) { - vhost_virtqueue_stop(hdev, - vdev, - hdev->vqs + i, - hdev->vq_index + i); + rc |= vhost_virtqueue_stop(hdev, + vdev, + hdev->vqs + i, + hdev->vq_index + i); } if (hdev->vhost_ops->vhost_reset_status) { hdev->vhost_ops->vhost_reset_status(hdev); @@ -2146,6 +2178,7 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) hdev->started = false; vdev->vhost_started = false; hdev->vdev = NULL; + return rc; } int vhost_net_set_backend(struct vhost_dev *hdev, |