From f811f97040a48358b456b46ecbc9167f0131034f Mon Sep 17 00:00:00 2001 From: Ladi Prosek Date: Tue, 30 May 2017 10:59:43 +0200 Subject: virtio-serial-bus: Unset hotplug handler when unrealize Virtio serial device controls the lifetime of virtio-serial-bus and virtio-serial-bus links back to the device via its hotplug-handler property. This extra ref-count prevents the device from getting finalized, leaving the VirtIODevice memory listener registered and leading to use-after-free later on. This patch addresses the same issue as Fam Zheng's "virtio-scsi: Unset hotplug handler when unrealize" only for a different virtio device. Cc: qemu-stable@nongnu.org Signed-off-by: Ladi Prosek Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Paolo Bonzini Reviewed-by: Fam Zheng --- hw/char/virtio-serial-bus.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c index d797a67..aa9c11a 100644 --- a/hw/char/virtio-serial-bus.c +++ b/hw/char/virtio-serial-bus.c @@ -1121,6 +1121,9 @@ static void virtio_serial_device_unrealize(DeviceState *dev, Error **errp) timer_free(vser->post_load->timer); g_free(vser->post_load); } + + qbus_set_hotplug_handler(BUS(&vser->bus), NULL, errp); + virtio_cleanup(vdev); } -- cgit v1.1 From b0ac429f1346e9fa13206d748bedc9bd497a55bc Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 1 Jun 2017 15:54:47 +0200 Subject: virtio: add virtqueue_alloc_element tracepoint This tracepoint can help diagnosing failures due to memory fragmentation in the guest. Signed-off-by: Paolo Bonzini Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/trace-events | 1 + hw/virtio/virtio.c | 1 + 2 files changed, 2 insertions(+) diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events index 1f7a7c1..e24d8fa 100644 --- a/hw/virtio/trace-events +++ b/hw/virtio/trace-events @@ -1,6 +1,7 @@ # See docs/tracing.txt for syntax documentation. # hw/virtio/virtio.c +virtqueue_alloc_element(void *elem, size_t sz, unsigned in_num, unsigned out_num) "elem %p size %zd in_num %u out_num %u" virtqueue_fill(void *vq, const void *elem, unsigned int len, unsigned int idx) "vq %p elem %p len %u idx %u" virtqueue_flush(void *vq, unsigned int count) "vq %p count %u" virtqueue_pop(void *vq, void *elem, unsigned int in_num, unsigned int out_num) "vq %p elem %p in_num %u out_num %u" diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index f99d99f..464947f 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -815,6 +815,7 @@ static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_nu assert(sz >= sizeof(VirtQueueElement)); elem = g_malloc(out_sg_end); + trace_virtqueue_alloc_element(elem, sz, in_num, out_num); elem->out_num = out_num; elem->in_num = in_num; elem->in_addr = (void *)elem + in_addr_ofs; -- cgit v1.1 From 46764fe09ca2e0f15c0981a672c166ed8cf57e72 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Fri, 2 Jun 2017 10:54:24 +0100 Subject: virtio-serial: fix segfault on disconnect Since commit d4c19cdeeb2f1e474bc426a6da261f1d7346eb5b ("virtio-serial: add missing virtio_detach_element() call") the following commands may cause QEMU to segfault: $ qemu -M accel=kvm -cpu host -m 1G \ -drive if=virtio,file=test.img,format=raw \ -device virtio-serial-pci,id=virtio-serial0 \ -chardev socket,id=channel1,path=/tmp/chardev.sock,server,nowait \ -device virtserialport,chardev=channel1,bus=virtio-serial0.0,id=port1 $ nc -U /tmp/chardev.sock ^C (guest)$ cat /dev/zero >/dev/vport0p1 The segfault is non-deterministic: if the event loop notices the socket has been closed then there is no crash. The disconnect has to happen right before QEMU attempts to write data to the socket. The backtrace is as follows: Thread 1 "qemu-system-x86" received signal SIGSEGV, Segmentation fault. 0x00005555557e0698 in do_flush_queued_data (port=0x5555582cedf0, vq=0x7fffcc854290, vdev=0x55555807b1d0) at hw/char/virtio-serial-bus.c:180 180 for (i = port->iov_idx; i < port->elem->out_num; i++) { #1 0x000055555580d363 in virtio_queue_notify_vq (vq=0x7fffcc854290) at hw/virtio/virtio.c:1524 #2 0x000055555580d363 in virtio_queue_host_notifier_read (n=0x7fffcc8542f8) at hw/virtio/virtio.c:2430 #3 0x0000555555b3482c in aio_dispatch_handlers (ctx=ctx@entry=0x5555566b8c80) at util/aio-posix.c:399 #4 0x0000555555b350d8 in aio_dispatch (ctx=0x5555566b8c80) at util/aio-posix.c:430 #5 0x0000555555b3212e in aio_ctx_dispatch (source=, callback=, user_data=) at util/async.c:261 #6 0x00007fffde71de52 in g_main_context_dispatch () at /lib64/libglib-2.0.so.0 #7 0x0000555555b34353 in glib_pollfds_poll () at util/main-loop.c:213 #8 0x0000555555b34353 in os_host_main_loop_wait (timeout=) at util/main-loop.c:261 #9 0x0000555555b34353 in main_loop_wait (nonblocking=) at util/main-loop.c:517 #10 0x0000555555773207 in main_loop () at vl.c:1917 #11 0x0000555555773207 in main (argc=, argv=, envp=) at vl.c:4751 The do_flush_queued_data() function does not anticipate chardev close events during vsc->have_data(). It expects port->elem to remain non-NULL for the duration its for loop. The fix is simply to return from do_flush_queued_data() if the port closes because the close event already frees port->elem and drains the virtqueue - there is nothing left for do_flush_queued_data() to do. Reported-by: Sitong Liu Reported-by: Min Deng Signed-off-by: Stefan Hajnoczi Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/char/virtio-serial-bus.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c index aa9c11a..f5bc173 100644 --- a/hw/char/virtio-serial-bus.c +++ b/hw/char/virtio-serial-bus.c @@ -186,6 +186,9 @@ static void do_flush_queued_data(VirtIOSerialPort *port, VirtQueue *vq, port->elem->out_sg[i].iov_base + port->iov_offset, buf_size); + if (!port->elem) { /* bail if we got disconnected */ + return; + } if (port->throttled) { port->iov_idx = i; if (ret > 0) { -- cgit v1.1 From fc58bd0d97c41dc3257001c86b2f802ae7255dff Mon Sep 17 00:00:00 2001 From: Maxime Coquelin Date: Fri, 2 Jun 2017 12:18:27 +0200 Subject: vhost: propagate errors in vhost_device_iotlb_miss() Some backends might want to know when things went wrong. Signed-off-by: Maxime Coquelin Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost.c | 15 ++++++++++----- include/hw/virtio/vhost.h | 2 +- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 03a46a7..8fab12d 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -971,18 +971,20 @@ static int vhost_memory_region_lookup(struct vhost_dev *hdev, return -EFAULT; } -void vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write) +int vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write) { IOMMUTLBEntry iotlb; uint64_t uaddr, len; + int ret = -EFAULT; rcu_read_lock(); iotlb = address_space_get_iotlb_entry(dev->vdev->dma_as, iova, write); if (iotlb.target_as != NULL) { - if (vhost_memory_region_lookup(dev, iotlb.translated_addr, - &uaddr, &len)) { + ret = vhost_memory_region_lookup(dev, iotlb.translated_addr, + &uaddr, &len); + if (ret) { error_report("Fail to lookup the translated address " "%"PRIx64, iotlb.translated_addr); goto out; @@ -991,14 +993,17 @@ void vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write) len = MIN(iotlb.addr_mask + 1, len); iova = iova & ~iotlb.addr_mask; - if (dev->vhost_ops->vhost_update_device_iotlb(dev, iova, uaddr, - len, iotlb.perm)) { + ret = dev->vhost_ops->vhost_update_device_iotlb(dev, iova, uaddr, + len, iotlb.perm); + if (ret) { error_report("Fail to update device iotlb"); goto out; } } out: rcu_read_unlock(); + + return ret; } static int vhost_virtqueue_start(struct vhost_dev *dev, diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h index a450321..467dc77 100644 --- a/include/hw/virtio/vhost.h +++ b/include/hw/virtio/vhost.h @@ -105,5 +105,5 @@ bool vhost_has_free_slot(void); int vhost_net_set_backend(struct vhost_dev *hdev, struct vhost_vring_file *file); -void vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write); +int vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write); #endif -- cgit v1.1 From 020e571b8bf90e022bbb78346e189f0f26e4675f Mon Sep 17 00:00:00 2001 From: Maxime Coquelin Date: Fri, 2 Jun 2017 12:18:28 +0200 Subject: vhost: rework IOTLB messaging This patch reworks IOTLB messaging to prepare for vhost-user device IOTLB support. IOTLB messages handling is extracted from vhost-kernel backend, so that only the messages transport remains backend specifics. Signed-off-by: Maxime Coquelin Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-backend.c | 130 +++++++++++++++++++++----------------- hw/virtio/vhost.c | 8 +-- include/hw/virtio/vhost-backend.h | 23 ++++--- 3 files changed, 92 insertions(+), 69 deletions(-) diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c index be927b8..4e31de1 100644 --- a/hw/virtio/vhost-backend.c +++ b/hw/virtio/vhost-backend.c @@ -192,7 +192,6 @@ static void vhost_kernel_iotlb_read(void *opaque) ssize_t len; while ((len = read((uintptr_t)dev->opaque, &msg, sizeof msg)) > 0) { - struct vhost_iotlb_msg *imsg = &msg.iotlb; if (len < sizeof msg) { error_report("Wrong vhost message len: %d", (int)len); break; @@ -201,70 +200,21 @@ static void vhost_kernel_iotlb_read(void *opaque) error_report("Unknown vhost iotlb message type"); break; } - switch (imsg->type) { - case VHOST_IOTLB_MISS: - vhost_device_iotlb_miss(dev, imsg->iova, - imsg->perm != VHOST_ACCESS_RO); - break; - case VHOST_IOTLB_UPDATE: - case VHOST_IOTLB_INVALIDATE: - error_report("Unexpected IOTLB message type"); - break; - case VHOST_IOTLB_ACCESS_FAIL: - /* FIXME: report device iotlb error */ - break; - default: - break; - } - } -} -static int vhost_kernel_update_device_iotlb(struct vhost_dev *dev, - uint64_t iova, uint64_t uaddr, - uint64_t len, - IOMMUAccessFlags perm) -{ - struct vhost_msg msg; - msg.type = VHOST_IOTLB_MSG; - msg.iotlb.iova = iova; - msg.iotlb.uaddr = uaddr; - msg.iotlb.size = len; - msg.iotlb.type = VHOST_IOTLB_UPDATE; - - switch (perm) { - case IOMMU_RO: - msg.iotlb.perm = VHOST_ACCESS_RO; - break; - case IOMMU_WO: - msg.iotlb.perm = VHOST_ACCESS_WO; - break; - case IOMMU_RW: - msg.iotlb.perm = VHOST_ACCESS_RW; - break; - default: - g_assert_not_reached(); - } - - if (write((uintptr_t)dev->opaque, &msg, sizeof msg) != sizeof msg) { - error_report("Fail to update device iotlb"); - return -EFAULT; + vhost_backend_handle_iotlb_msg(dev, &msg.iotlb); } - - return 0; } -static int vhost_kernel_invalidate_device_iotlb(struct vhost_dev *dev, - uint64_t iova, uint64_t len) +static int vhost_kernel_send_device_iotlb_msg(struct vhost_dev *dev, + struct vhost_iotlb_msg *imsg) { struct vhost_msg msg; msg.type = VHOST_IOTLB_MSG; - msg.iotlb.iova = iova; - msg.iotlb.size = len; - msg.iotlb.type = VHOST_IOTLB_INVALIDATE; + msg.iotlb = *imsg; if (write((uintptr_t)dev->opaque, &msg, sizeof msg) != sizeof msg) { - error_report("Fail to invalidate device iotlb"); + error_report("Fail to update device iotlb"); return -EFAULT; } @@ -311,8 +261,7 @@ static const VhostOps kernel_ops = { .vhost_vsock_set_running = vhost_kernel_vsock_set_running, #endif /* CONFIG_VHOST_VSOCK */ .vhost_set_iotlb_callback = vhost_kernel_set_iotlb_callback, - .vhost_update_device_iotlb = vhost_kernel_update_device_iotlb, - .vhost_invalidate_device_iotlb = vhost_kernel_invalidate_device_iotlb, + .vhost_send_device_iotlb_msg = vhost_kernel_send_device_iotlb_msg, }; int vhost_set_backend_type(struct vhost_dev *dev, VhostBackendType backend_type) @@ -333,3 +282,70 @@ int vhost_set_backend_type(struct vhost_dev *dev, VhostBackendType backend_type) return r; } + +int vhost_backend_update_device_iotlb(struct vhost_dev *dev, + uint64_t iova, uint64_t uaddr, + uint64_t len, + IOMMUAccessFlags perm) +{ + struct vhost_iotlb_msg imsg; + + imsg.iova = iova; + imsg.uaddr = uaddr; + imsg.size = len; + imsg.type = VHOST_IOTLB_UPDATE; + + switch (perm) { + case IOMMU_RO: + imsg.perm = VHOST_ACCESS_RO; + break; + case IOMMU_WO: + imsg.perm = VHOST_ACCESS_WO; + break; + case IOMMU_RW: + imsg.perm = VHOST_ACCESS_RW; + break; + default: + return -EINVAL; + } + + return dev->vhost_ops->vhost_send_device_iotlb_msg(dev, &imsg); +} + +int vhost_backend_invalidate_device_iotlb(struct vhost_dev *dev, + uint64_t iova, uint64_t len) +{ + struct vhost_iotlb_msg imsg; + + imsg.iova = iova; + imsg.size = len; + imsg.type = VHOST_IOTLB_INVALIDATE; + + return dev->vhost_ops->vhost_send_device_iotlb_msg(dev, &imsg); +} + +int vhost_backend_handle_iotlb_msg(struct vhost_dev *dev, + struct vhost_iotlb_msg *imsg) +{ + int ret = 0; + + switch (imsg->type) { + case VHOST_IOTLB_MISS: + ret = vhost_device_iotlb_miss(dev, imsg->iova, + imsg->perm != VHOST_ACCESS_RO); + break; + case VHOST_IOTLB_ACCESS_FAIL: + /* FIXME: report device iotlb error */ + error_report("Access failure IOTLB message type not supported"); + ret = -ENOTSUP; + break; + case VHOST_IOTLB_UPDATE: + case VHOST_IOTLB_INVALIDATE: + default: + error_report("Unexpected IOTLB message type"); + ret = -EINVAL; + break; + } + + return ret; +} diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 8fab12d..6eddb09 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -724,8 +724,8 @@ static void vhost_iommu_unmap_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) struct vhost_dev *hdev = iommu->hdev; hwaddr iova = iotlb->iova + iommu->iommu_offset; - if (hdev->vhost_ops->vhost_invalidate_device_iotlb(hdev, iova, - iotlb->addr_mask + 1)) { + if (vhost_backend_invalidate_device_iotlb(hdev, iova, + iotlb->addr_mask + 1)) { error_report("Fail to invalidate device iotlb"); } } @@ -993,8 +993,8 @@ int vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write) len = MIN(iotlb.addr_mask + 1, len); iova = iova & ~iotlb.addr_mask; - ret = dev->vhost_ops->vhost_update_device_iotlb(dev, iova, uaddr, - len, iotlb.perm); + ret = vhost_backend_update_device_iotlb(dev, iova, uaddr, + len, iotlb.perm); if (ret) { error_report("Fail to update device iotlb"); goto out; diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h index c3cf4a7..a7a5f22 100644 --- a/include/hw/virtio/vhost-backend.h +++ b/include/hw/virtio/vhost-backend.h @@ -27,6 +27,7 @@ struct vhost_vring_file; struct vhost_vring_state; struct vhost_vring_addr; struct vhost_scsi_target; +struct vhost_iotlb_msg; typedef int (*vhost_backend_init)(struct vhost_dev *dev, void *opaque); typedef int (*vhost_backend_cleanup)(struct vhost_dev *dev); @@ -81,12 +82,8 @@ typedef int (*vhost_vsock_set_guest_cid_op)(struct vhost_dev *dev, typedef int (*vhost_vsock_set_running_op)(struct vhost_dev *dev, int start); typedef void (*vhost_set_iotlb_callback_op)(struct vhost_dev *dev, int enabled); -typedef int (*vhost_update_device_iotlb_op)(struct vhost_dev *dev, - uint64_t iova, uint64_t uaddr, - uint64_t len, - IOMMUAccessFlags perm); -typedef int (*vhost_invalidate_device_iotlb_op)(struct vhost_dev *dev, - uint64_t iova, uint64_t len); +typedef int (*vhost_send_device_iotlb_msg_op)(struct vhost_dev *dev, + struct vhost_iotlb_msg *imsg); typedef struct VhostOps { VhostBackendType backend_type; @@ -120,8 +117,7 @@ typedef struct VhostOps { vhost_vsock_set_guest_cid_op vhost_vsock_set_guest_cid; vhost_vsock_set_running_op vhost_vsock_set_running; vhost_set_iotlb_callback_op vhost_set_iotlb_callback; - vhost_update_device_iotlb_op vhost_update_device_iotlb; - vhost_invalidate_device_iotlb_op vhost_invalidate_device_iotlb; + vhost_send_device_iotlb_msg_op vhost_send_device_iotlb_msg; } VhostOps; extern const VhostOps user_ops; @@ -129,4 +125,15 @@ extern const VhostOps user_ops; int vhost_set_backend_type(struct vhost_dev *dev, VhostBackendType backend_type); +int vhost_backend_update_device_iotlb(struct vhost_dev *dev, + uint64_t iova, uint64_t uaddr, + uint64_t len, + IOMMUAccessFlags perm); + +int vhost_backend_invalidate_device_iotlb(struct vhost_dev *dev, + uint64_t iova, uint64_t len); + +int vhost_backend_handle_iotlb_msg(struct vhost_dev *dev, + struct vhost_iotlb_msg *imsg); + #endif /* VHOST_BACKEND_H */ -- cgit v1.1 From 2152f3fead5ddaf7bdbe370f9b87713eae683b75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Fri, 2 Jun 2017 12:18:29 +0200 Subject: vhost-user: add vhost_user to hold the chr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Next patches will add more fields to the structure Signed-off-by: Marc-André Lureau Signed-off-by: Maxime Coquelin Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-user.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index dde094a..bd13b23 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -110,6 +110,10 @@ static VhostUserMsg m __attribute__ ((unused)); /* The version of the protocol we support */ #define VHOST_USER_VERSION (0x1) +struct vhost_user { + CharBackend *chr; +}; + static bool ioeventfd_enabled(void) { return kvm_enabled() && kvm_eventfds_enabled(); @@ -117,7 +121,8 @@ static bool ioeventfd_enabled(void) static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) { - CharBackend *chr = dev->opaque; + struct vhost_user *u = dev->opaque; + CharBackend *chr = u->chr; uint8_t *p = (uint8_t *) msg; int r, size = VHOST_USER_HDR_SIZE; @@ -202,7 +207,8 @@ static bool vhost_user_one_time_request(VhostUserRequest request) static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, int *fds, int fd_num) { - CharBackend *chr = dev->opaque; + struct vhost_user *u = dev->opaque; + CharBackend *chr = u->chr; int ret, size = VHOST_USER_HDR_SIZE + msg->size; /* @@ -575,11 +581,14 @@ static int vhost_user_reset_device(struct vhost_dev *dev) static int vhost_user_init(struct vhost_dev *dev, void *opaque) { uint64_t features; + struct vhost_user *u; int err; assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); - dev->opaque = opaque; + u = g_new0(struct vhost_user, 1); + u->chr = opaque; + dev->opaque = u; err = vhost_user_get_features(dev, &features); if (err < 0) { @@ -624,8 +633,12 @@ static int vhost_user_init(struct vhost_dev *dev, void *opaque) static int vhost_user_cleanup(struct vhost_dev *dev) { + struct vhost_user *u; + assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); + u = dev->opaque; + g_free(u); dev->opaque = 0; return 0; -- cgit v1.1 From 4bbeeba023f22c117d7a4c561354b91a0bf62e42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= Date: Fri, 2 Jun 2017 12:18:30 +0200 Subject: vhost-user: add slave-req-fd support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Learn to give a socket to the slave to let him make requests to the master. Signed-off-by: Marc-André Lureau Signed-off-by: Maxime Coquelin Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- docs/specs/vhost-user.txt | 32 +++++++++++- hw/virtio/vhost-user.c | 127 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 157 insertions(+), 2 deletions(-) diff --git a/docs/specs/vhost-user.txt b/docs/specs/vhost-user.txt index 036890f..5fa7016 100644 --- a/docs/specs/vhost-user.txt +++ b/docs/specs/vhost-user.txt @@ -139,6 +139,7 @@ in the ancillary data: * VHOST_USER_SET_VRING_KICK * VHOST_USER_SET_VRING_CALL * VHOST_USER_SET_VRING_ERR + * VHOST_USER_SET_SLAVE_REQ_FD If Master is unable to send the full message or receives a wrong reply it will close the connection. An optional reconnection mechanism can be implemented. @@ -252,6 +253,18 @@ Once the source has finished migration, rings will be stopped by the source. No further update must be done before rings are restarted. +Slave communication +------------------- + +An optional communication channel is provided if the slave declares +VHOST_USER_PROTOCOL_F_SLAVE_REQ protocol feature, to allow the slave to make +requests to the master. + +The fd is provided via VHOST_USER_SET_SLAVE_REQ_FD ancillary data. + +A slave may then send VHOST_USER_SLAVE_* messages to the master +using this fd communication channel. + Protocol features ----------------- @@ -260,9 +273,10 @@ Protocol features #define VHOST_USER_PROTOCOL_F_RARP 2 #define VHOST_USER_PROTOCOL_F_REPLY_ACK 3 #define VHOST_USER_PROTOCOL_F_MTU 4 +#define VHOST_USER_PROTOCOL_F_SLAVE_REQ 5 -Message types -------------- +Master message types +-------------------- * VHOST_USER_GET_FEATURES @@ -486,6 +500,20 @@ Message types If VHOST_USER_PROTOCOL_F_REPLY_ACK is negotiated, slave must respond with zero in case the specified MTU is valid, or non-zero otherwise. + * VHOST_USER_SET_SLAVE_REQ_FD + + Id: 21 + Equivalent ioctl: N/A + Master payload: N/A + + Set the socket file descriptor for slave initiated requests. It is passed + in the ancillary data. + This request should be sent only when VHOST_USER_F_PROTOCOL_FEATURES + has been negotiated, and protocol feature bit VHOST_USER_PROTOCOL_F_SLAVE_REQ + bit is present in VHOST_USER_GET_PROTOCOL_FEATURES. + If VHOST_USER_PROTOCOL_F_REPLY_ACK is negotiated, slave must respond + with zero for success, non-zero otherwise. + VHOST_USER_PROTOCOL_F_REPLY_ACK: ------------------------------- The original vhost-user specification only demands replies for certain diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index bd13b23..6a35600 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -32,6 +32,7 @@ enum VhostUserProtocolFeature { VHOST_USER_PROTOCOL_F_RARP = 2, VHOST_USER_PROTOCOL_F_REPLY_ACK = 3, VHOST_USER_PROTOCOL_F_NET_MTU = 4, + VHOST_USER_PROTOCOL_F_SLAVE_REQ = 5, VHOST_USER_PROTOCOL_F_MAX }; @@ -60,9 +61,15 @@ typedef enum VhostUserRequest { VHOST_USER_SET_VRING_ENABLE = 18, VHOST_USER_SEND_RARP = 19, VHOST_USER_NET_SET_MTU = 20, + VHOST_USER_SET_SLAVE_REQ_FD = 21, VHOST_USER_MAX } VhostUserRequest; +typedef enum VhostUserSlaveRequest { + VHOST_USER_SLAVE_NONE = 0, + VHOST_USER_SLAVE_MAX +} VhostUserSlaveRequest; + typedef struct VhostUserMemoryRegion { uint64_t guest_phys_addr; uint64_t memory_size; @@ -112,6 +119,7 @@ static VhostUserMsg m __attribute__ ((unused)); struct vhost_user { CharBackend *chr; + int slave_fd; }; static bool ioeventfd_enabled(void) @@ -578,6 +586,115 @@ static int vhost_user_reset_device(struct vhost_dev *dev) return 0; } +static void slave_read(void *opaque) +{ + struct vhost_dev *dev = opaque; + struct vhost_user *u = dev->opaque; + VhostUserMsg msg = { 0, }; + int size, ret = 0; + + /* Read header */ + size = read(u->slave_fd, &msg, VHOST_USER_HDR_SIZE); + if (size != VHOST_USER_HDR_SIZE) { + error_report("Failed to read from slave."); + goto err; + } + + if (msg.size > VHOST_USER_PAYLOAD_SIZE) { + error_report("Failed to read msg header." + " Size %d exceeds the maximum %zu.", msg.size, + VHOST_USER_PAYLOAD_SIZE); + goto err; + } + + /* Read payload */ + size = read(u->slave_fd, &msg.payload, msg.size); + if (size != msg.size) { + error_report("Failed to read payload from slave."); + goto err; + } + + switch (msg.request) { + default: + error_report("Received unexpected msg type."); + ret = -EINVAL; + } + + /* + * REPLY_ACK feature handling. Other reply types has to be managed + * directly in their request handlers. + */ + if (msg.flags & VHOST_USER_NEED_REPLY_MASK) { + msg.flags &= ~VHOST_USER_NEED_REPLY_MASK; + msg.flags |= VHOST_USER_REPLY_MASK; + + msg.payload.u64 = !!ret; + msg.size = sizeof(msg.payload.u64); + + size = write(u->slave_fd, &msg, VHOST_USER_HDR_SIZE + msg.size); + if (size != VHOST_USER_HDR_SIZE + msg.size) { + error_report("Failed to send msg reply to slave."); + goto err; + } + } + + return; + +err: + qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL); + close(u->slave_fd); + u->slave_fd = -1; + return; +} + +static int vhost_setup_slave_channel(struct vhost_dev *dev) +{ + VhostUserMsg msg = { + .request = VHOST_USER_SET_SLAVE_REQ_FD, + .flags = VHOST_USER_VERSION, + }; + struct vhost_user *u = dev->opaque; + int sv[2], ret = 0; + bool reply_supported = virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_REPLY_ACK); + + if (!virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_SLAVE_REQ)) { + return 0; + } + + if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) { + error_report("socketpair() failed"); + return -1; + } + + u->slave_fd = sv[0]; + qemu_set_fd_handler(u->slave_fd, slave_read, NULL, dev); + + if (reply_supported) { + msg.flags |= VHOST_USER_NEED_REPLY_MASK; + } + + ret = vhost_user_write(dev, &msg, &sv[1], 1); + if (ret) { + goto out; + } + + if (reply_supported) { + ret = process_message_reply(dev, &msg); + } + +out: + close(sv[1]); + if (ret) { + qemu_set_fd_handler(u->slave_fd, NULL, NULL, NULL); + close(u->slave_fd); + u->slave_fd = -1; + } + + return ret; +} + static int vhost_user_init(struct vhost_dev *dev, void *opaque) { uint64_t features; @@ -588,6 +705,7 @@ static int vhost_user_init(struct vhost_dev *dev, void *opaque) u = g_new0(struct vhost_user, 1); u->chr = opaque; + u->slave_fd = -1; dev->opaque = u; err = vhost_user_get_features(dev, &features); @@ -628,6 +746,11 @@ static int vhost_user_init(struct vhost_dev *dev, void *opaque) "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature."); } + err = vhost_setup_slave_channel(dev); + if (err < 0) { + return err; + } + return 0; } @@ -638,6 +761,10 @@ static int vhost_user_cleanup(struct vhost_dev *dev) assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); u = dev->opaque; + if (u->slave_fd >= 0) { + close(u->slave_fd); + u->slave_fd = -1; + } g_free(u); dev->opaque = 0; -- cgit v1.1 From 6dcdd06e3b0d0c5651219013ec975348e2050041 Mon Sep 17 00:00:00 2001 From: Maxime Coquelin Date: Fri, 2 Jun 2017 12:18:31 +0200 Subject: spec/vhost-user spec: Add IOMMU support This patch specifies and implements the master/slave communication to support device IOTLB in slave. The vhost_iotlb_msg structure introduced for kernel backends is re-used, making the design close between the two backends. An exception is the use of the secondary channel to enable the slave to send IOTLB miss requests to the master. Signed-off-by: Maxime Coquelin Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- docs/specs/vhost-user.txt | 84 +++++++++++++++++++++++++++++++++++++++++++++++ hw/net/vhost_net.c | 1 + hw/virtio/vhost-user.c | 48 +++++++++++++++++++++++++-- 3 files changed, 130 insertions(+), 3 deletions(-) diff --git a/docs/specs/vhost-user.txt b/docs/specs/vhost-user.txt index 5fa7016..481ab56 100644 --- a/docs/specs/vhost-user.txt +++ b/docs/specs/vhost-user.txt @@ -97,6 +97,25 @@ Depending on the request type, payload can be: log offset: offset from start of supplied file descriptor where logging starts (i.e. where guest address 0 would be logged) + * An IOTLB message + --------------------------------------------------------- + | iova | size | user address | permissions flags | type | + --------------------------------------------------------- + + IOVA: a 64-bit I/O virtual address programmed by the guest + Size: a 64-bit size + User address: a 64-bit user address + Permissions: a 8-bit value: + - 0: No access + - 1: Read access + - 2: Write access + - 3: Read/Write access + Type: a 8-bit IOTLB message type: + - 1: IOTLB miss + - 2: IOTLB update + - 3: IOTLB invalidate + - 4: IOTLB access fail + In QEMU the vhost-user message is implemented with the following struct: typedef struct VhostUserMsg { @@ -109,6 +128,7 @@ typedef struct VhostUserMsg { struct vhost_vring_addr addr; VhostUserMemory memory; VhostUserLog log; + struct vhost_iotlb_msg iotlb; }; } QEMU_PACKED VhostUserMsg; @@ -253,6 +273,38 @@ Once the source has finished migration, rings will be stopped by the source. No further update must be done before rings are restarted. +IOMMU support +------------- + +When the VIRTIO_F_IOMMU_PLATFORM feature has been negotiated, the master +sends IOTLB entries update & invalidation by sending VHOST_USER_IOTLB_MSG +requests to the slave with a struct vhost_iotlb_msg as payload. For update +events, the iotlb payload has to be filled with the update message type (2), +the I/O virtual address, the size, the user virtual address, and the +permissions flags. Addresses and size must be within vhost memory regions set +via the VHOST_USER_SET_MEM_TABLE request. For invalidation events, the iotlb +payload has to be filled with the invalidation message type (3), the I/O virtual +address and the size. On success, the slave is expected to reply with a zero +payload, non-zero otherwise. + +The slave relies on the slave communcation channel (see "Slave communication" +section below) to send IOTLB miss and access failure events, by sending +VHOST_USER_SLAVE_IOTLB_MSG requests to the master with a struct vhost_iotlb_msg +as payload. For miss events, the iotlb payload has to be filled with the miss +message type (1), the I/O virtual address and the permissions flags. For access +failure event, the iotlb payload has to be filled with the access failure +message type (4), the I/O virtual address and the permissions flags. +For synchronization purpose, the slave may rely on the reply-ack feature, +so the master may send a reply when operation is completed if the reply-ack +feature is negotiated and slaves requests a reply. For miss events, completed +operation means either master sent an update message containing the IOTLB entry +containing requested address and permission, or master sent nothing if the IOTLB +miss message is invalid (invalid IOVA or permission). + +The master isn't expected to take the initiative to send IOTLB update messages, +as the slave sends IOTLB miss messages for the guest virtual memory areas it +needs to access. + Slave communication ------------------- @@ -514,6 +566,38 @@ Master message types If VHOST_USER_PROTOCOL_F_REPLY_ACK is negotiated, slave must respond with zero for success, non-zero otherwise. + * VHOST_USER_IOTLB_MSG + + Id: 22 + Equivalent ioctl: N/A (equivalent to VHOST_IOTLB_MSG message type) + Master payload: struct vhost_iotlb_msg + Slave payload: u64 + + Send IOTLB messages with struct vhost_iotlb_msg as payload. + Master sends such requests to update and invalidate entries in the device + IOTLB. The slave has to acknowledge the request with sending zero as u64 + payload for success, non-zero otherwise. + This request should be send only when VIRTIO_F_IOMMU_PLATFORM feature + has been successfully negotiated. + +Slave message types +------------------- + + * VHOST_USER_SLAVE_IOTLB_MSG + + Id: 1 + Equivalent ioctl: N/A (equivalent to VHOST_IOTLB_MSG message type) + Slave payload: struct vhost_iotlb_msg + Master payload: N/A + + Send IOTLB messages with struct vhost_iotlb_msg as payload. + Slave sends such requests to notify of an IOTLB miss, or an IOTLB + access failure. If VHOST_USER_PROTOCOL_F_REPLY_ACK is negotiated, + and slave set the VHOST_USER_NEED_REPLY flag, master must respond with + zero when operation is successfully completed, or non-zero otherwise. + This request should be send only when VIRTIO_F_IOMMU_PLATFORM feature + has been successfully negotiated. + VHOST_USER_PROTOCOL_F_REPLY_ACK: ------------------------------- The original vhost-user specification only demands replies for certain diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index 22874a9..e037db6 100644 --- a/hw/net/vhost_net.c +++ b/hw/net/vhost_net.c @@ -77,6 +77,7 @@ static const int user_feature_bits[] = { VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_MTU, + VIRTIO_F_IOMMU_PLATFORM, /* This bit implies RARP isn't sent by QEMU out of band */ VIRTIO_NET_F_GUEST_ANNOUNCE, diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index 6a35600..7a9bb1d 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -62,11 +62,13 @@ typedef enum VhostUserRequest { VHOST_USER_SEND_RARP = 19, VHOST_USER_NET_SET_MTU = 20, VHOST_USER_SET_SLAVE_REQ_FD = 21, + VHOST_USER_IOTLB_MSG = 22, VHOST_USER_MAX } VhostUserRequest; typedef enum VhostUserSlaveRequest { VHOST_USER_SLAVE_NONE = 0, + VHOST_USER_SLAVE_IOTLB_MSG = 1, VHOST_USER_SLAVE_MAX } VhostUserSlaveRequest; @@ -104,6 +106,7 @@ typedef struct VhostUserMsg { struct vhost_vring_addr addr; VhostUserMemory memory; VhostUserLog log; + struct vhost_iotlb_msg iotlb; } payload; } QEMU_PACKED VhostUserMsg; @@ -615,6 +618,9 @@ static void slave_read(void *opaque) } switch (msg.request) { + case VHOST_USER_SLAVE_IOTLB_MSG: + ret = vhost_backend_handle_iotlb_msg(dev, &msg.payload.iotlb); + break; default: error_report("Received unexpected msg type."); ret = -EINVAL; @@ -697,7 +703,7 @@ out: static int vhost_user_init(struct vhost_dev *dev, void *opaque) { - uint64_t features; + uint64_t features, protocol_features; struct vhost_user *u; int err; @@ -717,12 +723,13 @@ static int vhost_user_init(struct vhost_dev *dev, void *opaque) dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES; err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES, - &features); + &protocol_features); if (err < 0) { return err; } - dev->protocol_features = features & VHOST_USER_PROTOCOL_FEATURE_MASK; + dev->protocol_features = + protocol_features & VHOST_USER_PROTOCOL_FEATURE_MASK; err = vhost_user_set_protocol_features(dev, dev->protocol_features); if (err < 0) { return err; @@ -736,6 +743,16 @@ static int vhost_user_init(struct vhost_dev *dev, void *opaque) return err; } } + + if (virtio_has_feature(features, VIRTIO_F_IOMMU_PLATFORM) && + !(virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_SLAVE_REQ) && + virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_REPLY_ACK))) { + error_report("IOMMU support requires reply-ack and " + "slave-req protocol features."); + return -1; + } } if (dev->migration_blocker == NULL && @@ -862,6 +879,29 @@ static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu) return 0; } +static int vhost_user_send_device_iotlb_msg(struct vhost_dev *dev, + struct vhost_iotlb_msg *imsg) +{ + VhostUserMsg msg = { + .request = VHOST_USER_IOTLB_MSG, + .size = sizeof(msg.payload.iotlb), + .flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, + .payload.iotlb = *imsg, + }; + + if (vhost_user_write(dev, &msg, NULL, 0) < 0) { + return -EFAULT; + } + + return process_message_reply(dev, &msg); +} + + +static void vhost_user_set_iotlb_callback(struct vhost_dev *dev, int enabled) +{ + /* No-op as the receive channel is not dedicated to IOTLB messages. */ +} + const VhostOps user_ops = { .backend_type = VHOST_BACKEND_TYPE_USER, .vhost_backend_init = vhost_user_init, @@ -886,4 +926,6 @@ const VhostOps user_ops = { .vhost_migration_done = vhost_user_migration_done, .vhost_backend_can_merge = vhost_user_can_merge, .vhost_net_set_mtu = vhost_user_net_set_mtu, + .vhost_set_iotlb_callback = vhost_user_set_iotlb_callback, + .vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg, }; -- cgit v1.1