aboutsummaryrefslogtreecommitdiff
path: root/hw/virtio/vhost.c
diff options
context:
space:
mode:
Diffstat (limited to 'hw/virtio/vhost.c')
-rw-r--r--hw/virtio/vhost.c213
1 files changed, 123 insertions, 90 deletions
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 4acd77e..fc43853 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -26,7 +26,7 @@
#include "hw/mem/memory-device.h"
#include "migration/blocker.h"
#include "migration/qemu-file-types.h"
-#include "sysemu/dma.h"
+#include "system/dma.h"
#include "trace.h"
/* enabled until disconnected backend stabilizes */
@@ -43,8 +43,9 @@
do { } while (0)
#endif
-static struct vhost_log *vhost_log;
-static struct vhost_log *vhost_log_shm;
+static struct vhost_log *vhost_log[VHOST_BACKEND_TYPE_MAX];
+static struct vhost_log *vhost_log_shm[VHOST_BACKEND_TYPE_MAX];
+static QLIST_HEAD(, vhost_dev) vhost_log_devs[VHOST_BACKEND_TYPE_MAX];
/* Memslots used by backends that support private memslots (without an fd). */
static unsigned int used_memslots;
@@ -149,6 +150,47 @@ bool vhost_dev_has_iommu(struct vhost_dev *dev)
}
}
+static inline bool vhost_dev_should_log(struct vhost_dev *dev)
+{
+ assert(dev->vhost_ops);
+ assert(dev->vhost_ops->backend_type > VHOST_BACKEND_TYPE_NONE);
+ assert(dev->vhost_ops->backend_type < VHOST_BACKEND_TYPE_MAX);
+
+ return dev == QLIST_FIRST(&vhost_log_devs[dev->vhost_ops->backend_type]);
+}
+
+static inline void vhost_dev_elect_mem_logger(struct vhost_dev *hdev, bool add)
+{
+ VhostBackendType backend_type;
+
+ assert(hdev->vhost_ops);
+
+ backend_type = hdev->vhost_ops->backend_type;
+ assert(backend_type > VHOST_BACKEND_TYPE_NONE);
+ assert(backend_type < VHOST_BACKEND_TYPE_MAX);
+
+ if (add && !QLIST_IS_INSERTED(hdev, logdev_entry)) {
+ if (QLIST_EMPTY(&vhost_log_devs[backend_type])) {
+ QLIST_INSERT_HEAD(&vhost_log_devs[backend_type],
+ hdev, logdev_entry);
+ } else {
+ /*
+ * The first vhost_device in the list is selected as the shared
+ * logger to scan memory sections. Put new entry next to the head
+ * to avoid inadvertent change to the underlying logger device.
+ * This is done in order to get better cache locality and to avoid
+ * performance churn on the hot path for log scanning. Even when
+ * new devices come and go quickly, it wouldn't end up changing
+ * the active leading logger device at all.
+ */
+ QLIST_INSERT_AFTER(QLIST_FIRST(&vhost_log_devs[backend_type]),
+ hdev, logdev_entry);
+ }
+ } else if (!add && QLIST_IS_INSERTED(hdev, logdev_entry)) {
+ QLIST_REMOVE(hdev, logdev_entry);
+ }
+}
+
static int vhost_sync_dirty_bitmap(struct vhost_dev *dev,
MemoryRegionSection *section,
hwaddr first,
@@ -166,12 +208,14 @@ static int vhost_sync_dirty_bitmap(struct vhost_dev *dev,
start_addr = MAX(first, start_addr);
end_addr = MIN(last, end_addr);
- for (i = 0; i < dev->mem->nregions; ++i) {
- struct vhost_memory_region *reg = dev->mem->regions + i;
- vhost_dev_sync_region(dev, section, start_addr, end_addr,
- reg->guest_phys_addr,
- range_get_last(reg->guest_phys_addr,
- reg->memory_size));
+ if (vhost_dev_should_log(dev)) {
+ for (i = 0; i < dev->mem->nregions; ++i) {
+ struct vhost_memory_region *reg = dev->mem->regions + i;
+ vhost_dev_sync_region(dev, section, start_addr, end_addr,
+ reg->guest_phys_addr,
+ range_get_last(reg->guest_phys_addr,
+ reg->memory_size));
+ }
}
for (i = 0; i < dev->nvqs; ++i) {
struct vhost_virtqueue *vq = dev->vqs + i;
@@ -287,6 +331,10 @@ static int vhost_set_backend_type(struct vhost_dev *dev,
r = -1;
}
+ if (r == 0) {
+ assert(dev->vhost_ops->backend_type == backend_type);
+ }
+
return r;
}
@@ -319,16 +367,22 @@ static struct vhost_log *vhost_log_alloc(uint64_t size, bool share)
return log;
}
-static struct vhost_log *vhost_log_get(uint64_t size, bool share)
+static struct vhost_log *vhost_log_get(VhostBackendType backend_type,
+ uint64_t size, bool share)
{
- struct vhost_log *log = share ? vhost_log_shm : vhost_log;
+ struct vhost_log *log;
+
+ assert(backend_type > VHOST_BACKEND_TYPE_NONE);
+ assert(backend_type < VHOST_BACKEND_TYPE_MAX);
+
+ log = share ? vhost_log_shm[backend_type] : vhost_log[backend_type];
if (!log || log->size != size) {
log = vhost_log_alloc(size, share);
if (share) {
- vhost_log_shm = log;
+ vhost_log_shm[backend_type] = log;
} else {
- vhost_log = log;
+ vhost_log[backend_type] = log;
}
} else {
++log->refcnt;
@@ -340,11 +394,20 @@ static struct vhost_log *vhost_log_get(uint64_t size, bool share)
static void vhost_log_put(struct vhost_dev *dev, bool sync)
{
struct vhost_log *log = dev->log;
+ VhostBackendType backend_type;
if (!log) {
return;
}
+ assert(dev->vhost_ops);
+ backend_type = dev->vhost_ops->backend_type;
+
+ if (backend_type == VHOST_BACKEND_TYPE_NONE ||
+ backend_type >= VHOST_BACKEND_TYPE_MAX) {
+ return;
+ }
+
--log->refcnt;
if (log->refcnt == 0) {
/* Sync only the range covered by the old log */
@@ -352,18 +415,19 @@ static void vhost_log_put(struct vhost_dev *dev, bool sync)
vhost_log_sync_range(dev, 0, dev->log_size * VHOST_LOG_CHUNK - 1);
}
- if (vhost_log == log) {
+ if (vhost_log[backend_type] == log) {
g_free(log->log);
- vhost_log = NULL;
- } else if (vhost_log_shm == log) {
+ vhost_log[backend_type] = NULL;
+ } else if (vhost_log_shm[backend_type] == log) {
qemu_memfd_free(log->log, log->size * sizeof(*(log->log)),
log->fd);
- vhost_log_shm = NULL;
+ vhost_log_shm[backend_type] = NULL;
}
g_free(log);
}
+ vhost_dev_elect_mem_logger(dev, false);
dev->log = NULL;
dev->log_size = 0;
}
@@ -376,7 +440,8 @@ static bool vhost_dev_log_is_shared(struct vhost_dev *dev)
static inline void vhost_dev_log_resize(struct vhost_dev *dev, uint64_t size)
{
- struct vhost_log *log = vhost_log_get(size, vhost_dev_log_is_shared(dev));
+ struct vhost_log *log = vhost_log_get(dev->vhost_ops->backend_type,
+ size, vhost_dev_log_is_shared(dev));
uint64_t log_base = (uintptr_t)log->log;
int r;
@@ -667,7 +732,6 @@ out:
memory_region_unref(old_sections[n_old_sections].mr);
}
g_free(old_sections);
- return;
}
/* Adds the section data to the tmp_section structure.
@@ -978,6 +1042,15 @@ static int vhost_dev_set_log(struct vhost_dev *dev, bool enable_log)
goto err_vq;
}
}
+
+ /*
+ * At log start we select our vhost_device logger that will scan the
+ * memory sections and skip for the others. This is possible because
+ * the log is shared amongst all vhost devices for a given type of
+ * backend.
+ */
+ vhost_dev_elect_mem_logger(dev, enable_log);
+
return 0;
err_vq:
for (; i >= 0; --i) {
@@ -1294,10 +1367,10 @@ fail_alloc_desc:
return r;
}
-void vhost_virtqueue_stop(struct vhost_dev *dev,
- struct VirtIODevice *vdev,
- struct vhost_virtqueue *vq,
- unsigned idx)
+int vhost_virtqueue_stop(struct vhost_dev *dev,
+ struct VirtIODevice *vdev,
+ struct vhost_virtqueue *vq,
+ unsigned idx)
{
int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, idx);
struct vhost_vring_state state = {
@@ -1307,7 +1380,7 @@ void vhost_virtqueue_stop(struct vhost_dev *dev,
if (virtio_queue_get_desc_addr(vdev, idx) == 0) {
/* Don't stop the virtqueue which might have not been started */
- return;
+ return 0;
}
r = dev->vhost_ops->vhost_get_vring_base(dev, &state);
@@ -1338,6 +1411,7 @@ void vhost_virtqueue_stop(struct vhost_dev *dev,
0, virtio_queue_get_avail_size(vdev, idx));
vhost_memory_unmap(dev, vq->desc, virtio_queue_get_desc_size(vdev, idx),
0, virtio_queue_get_desc_size(vdev, idx));
+ return r;
}
static int vhost_virtqueue_set_busyloop_timeout(struct vhost_dev *dev,
@@ -1608,9 +1682,9 @@ void vhost_dev_cleanup(struct vhost_dev *hdev)
memset(hdev, 0, sizeof(struct vhost_dev));
}
-static void vhost_dev_disable_notifiers_nvqs(struct vhost_dev *hdev,
- VirtIODevice *vdev,
- unsigned int nvqs)
+void vhost_dev_disable_notifiers_nvqs(struct vhost_dev *hdev,
+ VirtIODevice *vdev,
+ unsigned int nvqs)
{
BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
int i, r;
@@ -1856,62 +1930,6 @@ void vhost_dev_free_inflight(struct vhost_inflight *inflight)
}
}
-static int vhost_dev_resize_inflight(struct vhost_inflight *inflight,
- uint64_t new_size)
-{
- Error *err = NULL;
- int fd = -1;
- void *addr = qemu_memfd_alloc("vhost-inflight", new_size,
- F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL,
- &fd, &err);
-
- if (err) {
- error_report_err(err);
- return -ENOMEM;
- }
-
- vhost_dev_free_inflight(inflight);
- inflight->offset = 0;
- inflight->addr = addr;
- inflight->fd = fd;
- inflight->size = new_size;
-
- return 0;
-}
-
-void vhost_dev_save_inflight(struct vhost_inflight *inflight, QEMUFile *f)
-{
- if (inflight->addr) {
- qemu_put_be64(f, inflight->size);
- qemu_put_be16(f, inflight->queue_size);
- qemu_put_buffer(f, inflight->addr, inflight->size);
- } else {
- qemu_put_be64(f, 0);
- }
-}
-
-int vhost_dev_load_inflight(struct vhost_inflight *inflight, QEMUFile *f)
-{
- uint64_t size;
-
- size = qemu_get_be64(f);
- if (!size) {
- return 0;
- }
-
- if (inflight->size != size) {
- int ret = vhost_dev_resize_inflight(inflight, size);
- if (ret < 0) {
- return ret;
- }
- }
- inflight->queue_size = qemu_get_be16(f);
-
- qemu_get_buffer(f, inflight->addr, size);
-
- return 0;
-}
-
int vhost_dev_prepare_inflight(struct vhost_dev *hdev, VirtIODevice *vdev)
{
int r;
@@ -2044,7 +2062,8 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings)
uint64_t log_base;
hdev->log_size = vhost_get_log_size(hdev);
- hdev->log = vhost_log_get(hdev->log_size,
+ hdev->log = vhost_log_get(hdev->vhost_ops->backend_type,
+ hdev->log_size,
vhost_dev_log_is_shared(hdev));
log_base = (uintptr_t)hdev->log->log;
r = hdev->vhost_ops->vhost_set_log_base(hdev,
@@ -2054,6 +2073,7 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings)
VHOST_OPS_DEBUG(r, "vhost_set_log_base failed");
goto fail_log;
}
+ vhost_dev_elect_mem_logger(hdev, true);
}
if (vrings) {
r = vhost_dev_set_vring_enable(hdev, true);
@@ -2075,11 +2095,22 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings)
* vhost-kernel code requires for this.*/
for (i = 0; i < hdev->nvqs; ++i) {
struct vhost_virtqueue *vq = hdev->vqs + i;
- vhost_device_iotlb_miss(hdev, vq->used_phys, true);
+ r = vhost_device_iotlb_miss(hdev, vq->used_phys, true);
+ if (r) {
+ goto fail_iotlb;
+ }
}
}
vhost_start_config_intr(hdev);
return 0;
+fail_iotlb:
+ if (vhost_dev_has_iommu(hdev) &&
+ hdev->vhost_ops->vhost_set_iotlb_callback) {
+ hdev->vhost_ops->vhost_set_iotlb_callback(hdev, false);
+ }
+ if (hdev->vhost_ops->vhost_dev_start) {
+ hdev->vhost_ops->vhost_dev_start(hdev, false);
+ }
fail_start:
if (vrings) {
vhost_dev_set_vring_enable(hdev, false);
@@ -2105,9 +2136,10 @@ fail_features:
}
/* Host notifiers must be enabled at this point. */
-void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings)
+int vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings)
{
int i;
+ int rc = 0;
/* should only be called after backend is connected */
assert(hdev->vhost_ops);
@@ -2126,10 +2158,10 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings)
vhost_dev_set_vring_enable(hdev, false);
}
for (i = 0; i < hdev->nvqs; ++i) {
- vhost_virtqueue_stop(hdev,
- vdev,
- hdev->vqs + i,
- hdev->vq_index + i);
+ rc |= vhost_virtqueue_stop(hdev,
+ vdev,
+ hdev->vqs + i,
+ hdev->vq_index + i);
}
if (hdev->vhost_ops->vhost_reset_status) {
hdev->vhost_ops->vhost_reset_status(hdev);
@@ -2146,6 +2178,7 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings)
hdev->started = false;
vdev->vhost_started = false;
hdev->vdev = NULL;
+ return rc;
}
int vhost_net_set_backend(struct vhost_dev *hdev,