diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2015-10-22 12:41:44 +0100 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2015-10-22 12:41:44 +0100 |
commit | ca3e40e233e87f7b29442311736a82da01c0df7b (patch) | |
tree | dba688d01dceded9b998e5d5cfb8cb9264354604 /hw/virtio | |
parent | c1bd8997438f1b556acfeab1d52245ff7cc680c0 (diff) | |
parent | 3c23402d4032f69af44a87fdb8019ad3229a4f31 (diff) | |
download | qemu-ca3e40e233e87f7b29442311736a82da01c0df7b.zip qemu-ca3e40e233e87f7b29442311736a82da01c0df7b.tar.gz qemu-ca3e40e233e87f7b29442311736a82da01c0df7b.tar.bz2 |
Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging
vhost, pc, virtio features, fixes, cleanups
New features:
VT-d support for devices behind a bridge
vhost-user migration support
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
# gpg: Signature made Thu 22 Oct 2015 12:39:19 BST using RSA key ID D28D5469
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>"
# gpg: aka "Michael S. Tsirkin <mst@redhat.com>"
* remotes/mst/tags/for_upstream: (37 commits)
hw/isa/lpc_ich9: inject the SMI on the VCPU that is writing to APM_CNT
i386: keep cpu_model field in MachineState uptodate
vhost: set the correct queue index in case of migration with multiqueue
piix: fix resource leak reported by Coverity
seccomp: add memfd_create to whitelist
vhost-user-test: check ownership during migration
vhost-user-test: add live-migration test
vhost-user-test: learn to tweak various qemu arguments
vhost-user-test: wrap server in TestServer struct
vhost-user-test: remove useless static check
vhost-user-test: move wait_for_fds() out
vhost: add migration block if memfd failed
vhost-user: use an enum helper for features mask
vhost user: add rarp sending after live migration for legacy guest
vhost user: add support of live migration
net: add trace_vhost_user_event
vhost-user: document migration log
vhost: use a function for each call
vhost-user: add a migration blocker
vhost-user: send log shm fd along with log_base
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Diffstat (limited to 'hw/virtio')
-rw-r--r-- | hw/virtio/vhost-backend.c | 138 | ||||
-rw-r--r-- | hw/virtio/vhost-user.c | 581 | ||||
-rw-r--r-- | hw/virtio/vhost.c | 156 |
3 files changed, 632 insertions, 243 deletions
diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c index 72d1392..1d5f684 100644 --- a/hw/virtio/vhost-backend.c +++ b/hw/virtio/vhost-backend.c @@ -11,6 +11,7 @@ #include "hw/virtio/vhost.h" #include "hw/virtio/vhost-backend.h" #include "qemu/error-report.h" +#include "linux/vhost.h" #include <sys/ioctl.h> @@ -42,6 +43,122 @@ static int vhost_kernel_cleanup(struct vhost_dev *dev) return close(fd); } +static int vhost_kernel_memslots_limit(struct vhost_dev *dev) +{ + int limit = 64; + char *s; + + if (g_file_get_contents("/sys/module/vhost/parameters/max_mem_regions", + &s, NULL, NULL)) { + uint64_t val = g_ascii_strtoull(s, NULL, 10); + if (!((val == G_MAXUINT64 || !val) && errno)) { + return val; + } + error_report("ignoring invalid max_mem_regions value in vhost module:" + " %s", s); + } + return limit; +} + +static int vhost_kernel_net_set_backend(struct vhost_dev *dev, + struct vhost_vring_file *file) +{ + return vhost_kernel_call(dev, VHOST_NET_SET_BACKEND, file); +} + +static int vhost_kernel_scsi_set_endpoint(struct vhost_dev *dev, + struct vhost_scsi_target *target) +{ + return vhost_kernel_call(dev, VHOST_SCSI_SET_ENDPOINT, target); +} + +static int vhost_kernel_scsi_clear_endpoint(struct vhost_dev *dev, + struct vhost_scsi_target *target) +{ + return vhost_kernel_call(dev, VHOST_SCSI_CLEAR_ENDPOINT, target); +} + +static int vhost_kernel_scsi_get_abi_version(struct vhost_dev *dev, int *version) +{ + return vhost_kernel_call(dev, VHOST_SCSI_GET_ABI_VERSION, version); +} + +static int vhost_kernel_set_log_base(struct vhost_dev *dev, uint64_t base, + struct vhost_log *log) +{ + return vhost_kernel_call(dev, VHOST_SET_LOG_BASE, &base); +} + +static int vhost_kernel_set_mem_table(struct vhost_dev *dev, + struct vhost_memory *mem) +{ + return vhost_kernel_call(dev, VHOST_SET_MEM_TABLE, mem); +} + +static int vhost_kernel_set_vring_addr(struct vhost_dev *dev, + struct vhost_vring_addr *addr) +{ + return vhost_kernel_call(dev, VHOST_SET_VRING_ADDR, addr); +} + +static int vhost_kernel_set_vring_endian(struct vhost_dev *dev, + struct vhost_vring_state *ring) +{ + return vhost_kernel_call(dev, VHOST_SET_VRING_ENDIAN, ring); +} + +static int vhost_kernel_set_vring_num(struct vhost_dev *dev, + struct vhost_vring_state *ring) +{ + return vhost_kernel_call(dev, VHOST_SET_VRING_NUM, ring); +} + +static int vhost_kernel_set_vring_base(struct vhost_dev *dev, + struct vhost_vring_state *ring) +{ + return vhost_kernel_call(dev, VHOST_SET_VRING_BASE, ring); +} + +static int vhost_kernel_get_vring_base(struct vhost_dev *dev, + struct vhost_vring_state *ring) +{ + return vhost_kernel_call(dev, VHOST_GET_VRING_BASE, ring); +} + +static int vhost_kernel_set_vring_kick(struct vhost_dev *dev, + struct vhost_vring_file *file) +{ + return vhost_kernel_call(dev, VHOST_SET_VRING_KICK, file); +} + +static int vhost_kernel_set_vring_call(struct vhost_dev *dev, + struct vhost_vring_file *file) +{ + return vhost_kernel_call(dev, VHOST_SET_VRING_CALL, file); +} + +static int vhost_kernel_set_features(struct vhost_dev *dev, + uint64_t features) +{ + return vhost_kernel_call(dev, VHOST_SET_FEATURES, &features); +} + +static int vhost_kernel_get_features(struct vhost_dev *dev, + uint64_t *features) +{ + return vhost_kernel_call(dev, VHOST_GET_FEATURES, features); +} + +static int vhost_kernel_set_owner(struct vhost_dev *dev) +{ + return vhost_kernel_call(dev, VHOST_SET_OWNER, NULL); +} + +static int vhost_kernel_reset_device(struct vhost_dev *dev) +{ + return vhost_kernel_call(dev, VHOST_RESET_DEVICE, NULL); +} + static int vhost_kernel_get_vq_index(struct vhost_dev *dev, int idx) { assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); @@ -51,10 +168,27 @@ static int vhost_kernel_get_vq_index(struct vhost_dev *dev, int idx) static const VhostOps kernel_ops = { .backend_type = VHOST_BACKEND_TYPE_KERNEL, - .vhost_call = vhost_kernel_call, .vhost_backend_init = vhost_kernel_init, .vhost_backend_cleanup = vhost_kernel_cleanup, - .vhost_backend_get_vq_index = vhost_kernel_get_vq_index, + .vhost_backend_memslots_limit = vhost_kernel_memslots_limit, + .vhost_net_set_backend = vhost_kernel_net_set_backend, + .vhost_scsi_set_endpoint = vhost_kernel_scsi_set_endpoint, + .vhost_scsi_clear_endpoint = vhost_kernel_scsi_clear_endpoint, + .vhost_scsi_get_abi_version = vhost_kernel_scsi_get_abi_version, + .vhost_set_log_base = vhost_kernel_set_log_base, + .vhost_set_mem_table = vhost_kernel_set_mem_table, + .vhost_set_vring_addr = vhost_kernel_set_vring_addr, + .vhost_set_vring_endian = vhost_kernel_set_vring_endian, + .vhost_set_vring_num = vhost_kernel_set_vring_num, + .vhost_set_vring_base = vhost_kernel_set_vring_base, + .vhost_get_vring_base = vhost_kernel_get_vring_base, + .vhost_set_vring_kick = vhost_kernel_set_vring_kick, + .vhost_set_vring_call = vhost_kernel_set_vring_call, + .vhost_set_features = vhost_kernel_set_features, + .vhost_get_features = vhost_kernel_get_features, + .vhost_set_owner = vhost_kernel_set_owner, + .vhost_reset_device = vhost_kernel_reset_device, + .vhost_get_vq_index = vhost_kernel_get_vq_index, }; int vhost_set_backend_type(struct vhost_dev *dev, VhostBackendType backend_type) diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index b11c0d2..78442ba 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -10,11 +10,13 @@ #include "hw/virtio/vhost.h" #include "hw/virtio/vhost-backend.h" +#include "hw/virtio/virtio-net.h" #include "sysemu/char.h" #include "sysemu/kvm.h" #include "qemu/error-report.h" #include "qemu/sockets.h" #include "exec/ram_addr.h" +#include "migration/migration.h" #include <fcntl.h> #include <unistd.h> @@ -25,9 +27,16 @@ #define VHOST_MEMORY_MAX_NREGIONS 8 #define VHOST_USER_F_PROTOCOL_FEATURES 30 -#define VHOST_USER_PROTOCOL_FEATURE_MASK 0x1ULL -#define VHOST_USER_PROTOCOL_F_MQ 0 +enum VhostUserProtocolFeature { + VHOST_USER_PROTOCOL_F_MQ = 0, + VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1, + VHOST_USER_PROTOCOL_F_RARP = 2, + + VHOST_USER_PROTOCOL_F_MAX +}; + +#define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1) typedef enum VhostUserRequest { VHOST_USER_NONE = 0, @@ -49,6 +58,7 @@ typedef enum VhostUserRequest { VHOST_USER_SET_PROTOCOL_FEATURES = 16, VHOST_USER_GET_QUEUE_NUM = 17, VHOST_USER_SET_VRING_ENABLE = 18, + VHOST_USER_SEND_RARP = 19, VHOST_USER_MAX } VhostUserRequest; @@ -97,37 +107,6 @@ static bool ioeventfd_enabled(void) return kvm_enabled() && kvm_eventfds_enabled(); } -static unsigned long int ioctl_to_vhost_user_request[VHOST_USER_MAX] = { - -1, /* VHOST_USER_NONE */ - VHOST_GET_FEATURES, /* VHOST_USER_GET_FEATURES */ - VHOST_SET_FEATURES, /* VHOST_USER_SET_FEATURES */ - VHOST_SET_OWNER, /* VHOST_USER_SET_OWNER */ - VHOST_RESET_DEVICE, /* VHOST_USER_RESET_DEVICE */ - VHOST_SET_MEM_TABLE, /* VHOST_USER_SET_MEM_TABLE */ - VHOST_SET_LOG_BASE, /* VHOST_USER_SET_LOG_BASE */ - VHOST_SET_LOG_FD, /* VHOST_USER_SET_LOG_FD */ - VHOST_SET_VRING_NUM, /* VHOST_USER_SET_VRING_NUM */ - VHOST_SET_VRING_ADDR, /* VHOST_USER_SET_VRING_ADDR */ - VHOST_SET_VRING_BASE, /* VHOST_USER_SET_VRING_BASE */ - VHOST_GET_VRING_BASE, /* VHOST_USER_GET_VRING_BASE */ - VHOST_SET_VRING_KICK, /* VHOST_USER_SET_VRING_KICK */ - VHOST_SET_VRING_CALL, /* VHOST_USER_SET_VRING_CALL */ - VHOST_SET_VRING_ERR /* VHOST_USER_SET_VRING_ERR */ -}; - -static VhostUserRequest vhost_user_request_translate(unsigned long int request) -{ - VhostUserRequest idx; - - for (idx = 0; idx < VHOST_USER_MAX; idx++) { - if (ioctl_to_vhost_user_request[idx] == request) { - break; - } - } - - return (idx == VHOST_USER_MAX) ? VHOST_USER_NONE : idx; -} - static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) { CharDriverState *chr = dev->opaque; @@ -174,12 +153,35 @@ fail: return -1; } +static bool vhost_user_one_time_request(VhostUserRequest request) +{ + switch (request) { + case VHOST_USER_SET_OWNER: + case VHOST_USER_RESET_DEVICE: + case VHOST_USER_SET_MEM_TABLE: + case VHOST_USER_GET_QUEUE_NUM: + return true; + default: + return false; + } +} + +/* most non-init callers ignore the error */ static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, int *fds, int fd_num) { CharDriverState *chr = dev->opaque; int size = VHOST_USER_HDR_SIZE + msg->size; + /* + * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE, + * we just need send it once in the first time. For later such + * request, we just ignore it. + */ + if (vhost_user_one_time_request(msg->request) && dev->vq_index != 0) { + return 0; + } + if (fd_num) { qemu_chr_fe_set_msgfds(chr, fds, fd_num); } @@ -188,195 +190,321 @@ static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, 0 : -1; } -static bool vhost_user_one_time_request(VhostUserRequest request) +static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base, + struct vhost_log *log) { - switch (request) { - case VHOST_USER_SET_OWNER: - case VHOST_USER_RESET_DEVICE: - case VHOST_USER_SET_MEM_TABLE: - case VHOST_USER_GET_QUEUE_NUM: - return true; - default: - return false; + int fds[VHOST_MEMORY_MAX_NREGIONS]; + size_t fd_num = 0; + bool shmfd = virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_LOG_SHMFD); + VhostUserMsg msg = { + .request = VHOST_USER_SET_LOG_BASE, + .flags = VHOST_USER_VERSION, + .u64 = base, + .size = sizeof(m.u64), + }; + + if (shmfd && log->fd != -1) { + fds[fd_num++] = log->fd; } + + vhost_user_write(dev, &msg, fds, fd_num); + + if (shmfd) { + msg.size = 0; + if (vhost_user_read(dev, &msg) < 0) { + return 0; + } + + if (msg.request != VHOST_USER_SET_LOG_BASE) { + error_report("Received unexpected msg type. " + "Expected %d received %d", + VHOST_USER_SET_LOG_BASE, msg.request); + return -1; + } + } + + return 0; } -static int vhost_user_call(struct vhost_dev *dev, unsigned long int request, - void *arg) +static int vhost_user_set_mem_table(struct vhost_dev *dev, + struct vhost_memory *mem) { - VhostUserMsg msg; - VhostUserRequest msg_request; - struct vhost_vring_file *file = 0; - int need_reply = 0; int fds[VHOST_MEMORY_MAX_NREGIONS]; int i, fd; size_t fd_num = 0; + VhostUserMsg msg = { + .request = VHOST_USER_SET_MEM_TABLE, + .flags = VHOST_USER_VERSION, + }; - assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); - - /* only translate vhost ioctl requests */ - if (request > VHOST_USER_MAX) { - msg_request = vhost_user_request_translate(request); - } else { - msg_request = request; + for (i = 0; i < dev->mem->nregions; ++i) { + struct vhost_memory_region *reg = dev->mem->regions + i; + ram_addr_t ram_addr; + + assert((uintptr_t)reg->userspace_addr == reg->userspace_addr); + qemu_ram_addr_from_host((void *)(uintptr_t)reg->userspace_addr, + &ram_addr); + fd = qemu_get_ram_fd(ram_addr); + if (fd > 0) { + msg.memory.regions[fd_num].userspace_addr = reg->userspace_addr; + msg.memory.regions[fd_num].memory_size = reg->memory_size; + msg.memory.regions[fd_num].guest_phys_addr = reg->guest_phys_addr; + msg.memory.regions[fd_num].mmap_offset = reg->userspace_addr - + (uintptr_t) qemu_get_ram_block_host_ptr(ram_addr); + assert(fd_num < VHOST_MEMORY_MAX_NREGIONS); + fds[fd_num++] = fd; + } } - /* - * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE, - * we just need send it once in the first time. For later such - * request, we just ignore it. - */ - if (vhost_user_one_time_request(msg_request) && dev->vq_index != 0) { - return 0; + msg.memory.nregions = fd_num; + + if (!fd_num) { + error_report("Failed initializing vhost-user memory map, " + "consider using -object memory-backend-file share=on"); + return -1; } - msg.request = msg_request; - msg.flags = VHOST_USER_VERSION; - msg.size = 0; + msg.size = sizeof(m.memory.nregions); + msg.size += sizeof(m.memory.padding); + msg.size += fd_num * sizeof(VhostUserMemoryRegion); - switch (msg_request) { - case VHOST_USER_GET_FEATURES: - case VHOST_USER_GET_PROTOCOL_FEATURES: - case VHOST_USER_GET_QUEUE_NUM: - need_reply = 1; - break; + vhost_user_write(dev, &msg, fds, fd_num); - case VHOST_USER_SET_FEATURES: - case VHOST_USER_SET_LOG_BASE: - case VHOST_USER_SET_PROTOCOL_FEATURES: - msg.u64 = *((__u64 *) arg); - msg.size = sizeof(m.u64); - break; + return 0; +} - case VHOST_USER_SET_OWNER: - case VHOST_USER_RESET_DEVICE: - break; +static int vhost_user_set_vring_addr(struct vhost_dev *dev, + struct vhost_vring_addr *addr) +{ + VhostUserMsg msg = { + .request = VHOST_USER_SET_VRING_ADDR, + .flags = VHOST_USER_VERSION, + .addr = *addr, + .size = sizeof(*addr), + }; - case VHOST_USER_SET_MEM_TABLE: - for (i = 0; i < dev->mem->nregions; ++i) { - struct vhost_memory_region *reg = dev->mem->regions + i; - ram_addr_t ram_addr; - - assert((uintptr_t)reg->userspace_addr == reg->userspace_addr); - qemu_ram_addr_from_host((void *)(uintptr_t)reg->userspace_addr, &ram_addr); - fd = qemu_get_ram_fd(ram_addr); - if (fd > 0) { - msg.memory.regions[fd_num].userspace_addr = reg->userspace_addr; - msg.memory.regions[fd_num].memory_size = reg->memory_size; - msg.memory.regions[fd_num].guest_phys_addr = reg->guest_phys_addr; - msg.memory.regions[fd_num].mmap_offset = reg->userspace_addr - - (uintptr_t) qemu_get_ram_block_host_ptr(ram_addr); - assert(fd_num < VHOST_MEMORY_MAX_NREGIONS); - fds[fd_num++] = fd; - } - } + vhost_user_write(dev, &msg, NULL, 0); - msg.memory.nregions = fd_num; + return 0; +} - if (!fd_num) { - error_report("Failed initializing vhost-user memory map, " - "consider using -object memory-backend-file share=on"); - return -1; - } +static int vhost_user_set_vring_endian(struct vhost_dev *dev, + struct vhost_vring_state *ring) +{ + error_report("vhost-user trying to send unhandled ioctl"); + return -1; +} - msg.size = sizeof(m.memory.nregions); - msg.size += sizeof(m.memory.padding); - msg.size += fd_num * sizeof(VhostUserMemoryRegion); - - break; - - case VHOST_USER_SET_LOG_FD: - fds[fd_num++] = *((int *) arg); - break; - - case VHOST_USER_SET_VRING_NUM: - case VHOST_USER_SET_VRING_BASE: - case VHOST_USER_SET_VRING_ENABLE: - memcpy(&msg.state, arg, sizeof(struct vhost_vring_state)); - msg.size = sizeof(m.state); - break; - - case VHOST_USER_GET_VRING_BASE: - memcpy(&msg.state, arg, sizeof(struct vhost_vring_state)); - msg.size = sizeof(m.state); - need_reply = 1; - break; - - case VHOST_USER_SET_VRING_ADDR: - memcpy(&msg.addr, arg, sizeof(struct vhost_vring_addr)); - msg.size = sizeof(m.addr); - break; - - case VHOST_USER_SET_VRING_KICK: - case VHOST_USER_SET_VRING_CALL: - case VHOST_USER_SET_VRING_ERR: - file = arg; - msg.u64 = file->index & VHOST_USER_VRING_IDX_MASK; - msg.size = sizeof(m.u64); - if (ioeventfd_enabled() && file->fd > 0) { - fds[fd_num++] = file->fd; - } else { - msg.u64 |= VHOST_USER_VRING_NOFD_MASK; - } - break; - default: - error_report("vhost-user trying to send unhandled ioctl"); +static int vhost_set_vring(struct vhost_dev *dev, + unsigned long int request, + struct vhost_vring_state *ring) +{ + VhostUserMsg msg = { + .request = request, + .flags = VHOST_USER_VERSION, + .state = *ring, + .size = sizeof(*ring), + }; + + vhost_user_write(dev, &msg, NULL, 0); + + return 0; +} + +static int vhost_user_set_vring_num(struct vhost_dev *dev, + struct vhost_vring_state *ring) +{ + return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring); +} + +static int vhost_user_set_vring_base(struct vhost_dev *dev, + struct vhost_vring_state *ring) +{ + return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring); +} + +static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable) +{ + struct vhost_vring_state state = { + .index = dev->vq_index, + .num = enable, + }; + + if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ))) { return -1; - break; } - if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { + return vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state); +} + + +static int vhost_user_get_vring_base(struct vhost_dev *dev, + struct vhost_vring_state *ring) +{ + VhostUserMsg msg = { + .request = VHOST_USER_GET_VRING_BASE, + .flags = VHOST_USER_VERSION, + .state = *ring, + .size = sizeof(*ring), + }; + + vhost_user_write(dev, &msg, NULL, 0); + + if (vhost_user_read(dev, &msg) < 0) { return 0; } - if (need_reply) { - if (vhost_user_read(dev, &msg) < 0) { - return 0; - } + if (msg.request != VHOST_USER_GET_VRING_BASE) { + error_report("Received unexpected msg type. Expected %d received %d", + VHOST_USER_GET_VRING_BASE, msg.request); + return -1; + } - if (msg_request != msg.request) { - error_report("Received unexpected msg type." - " Expected %d received %d", msg_request, msg.request); - return -1; - } + if (msg.size != sizeof(m.state)) { + error_report("Received bad msg size."); + return -1; + } - switch (msg_request) { - case VHOST_USER_GET_FEATURES: - case VHOST_USER_GET_PROTOCOL_FEATURES: - case VHOST_USER_GET_QUEUE_NUM: - if (msg.size != sizeof(m.u64)) { - error_report("Received bad msg size."); - return -1; - } - *((__u64 *) arg) = msg.u64; - break; - case VHOST_USER_GET_VRING_BASE: - if (msg.size != sizeof(m.state)) { - error_report("Received bad msg size."); - return -1; - } - memcpy(arg, &msg.state, sizeof(struct vhost_vring_state)); - break; - default: - error_report("Received unexpected msg type."); - return -1; - break; - } + *ring = msg.state; + + return 0; +} + +static int vhost_set_vring_file(struct vhost_dev *dev, + VhostUserRequest request, + struct vhost_vring_file *file) +{ + int fds[VHOST_MEMORY_MAX_NREGIONS]; + size_t fd_num = 0; + VhostUserMsg msg = { + .request = request, + .flags = VHOST_USER_VERSION, + .u64 = file->index & VHOST_USER_VRING_IDX_MASK, + .size = sizeof(m.u64), + }; + + if (ioeventfd_enabled() && file->fd > 0) { + fds[fd_num++] = file->fd; + } else { + msg.u64 |= VHOST_USER_VRING_NOFD_MASK; + } + + vhost_user_write(dev, &msg, fds, fd_num); + + return 0; +} + +static int vhost_user_set_vring_kick(struct vhost_dev *dev, + struct vhost_vring_file *file) +{ + return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file); +} + +static int vhost_user_set_vring_call(struct vhost_dev *dev, + struct vhost_vring_file *file) +{ + return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file); +} + +static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64) +{ + VhostUserMsg msg = { + .request = request, + .flags = VHOST_USER_VERSION, + .u64 = u64, + .size = sizeof(m.u64), + }; + + vhost_user_write(dev, &msg, NULL, 0); + + return 0; +} + +static int vhost_user_set_features(struct vhost_dev *dev, + uint64_t features) +{ + return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, features); +} + +static int vhost_user_set_protocol_features(struct vhost_dev *dev, + uint64_t features) +{ + return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features); +} + +static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64) +{ + VhostUserMsg msg = { + .request = request, + .flags = VHOST_USER_VERSION, + }; + + if (vhost_user_one_time_request(request) && dev->vq_index != 0) { + return 0; + } + + vhost_user_write(dev, &msg, NULL, 0); + + if (vhost_user_read(dev, &msg) < 0) { + return 0; + } + + if (msg.request != request) { + error_report("Received unexpected msg type. Expected %d received %d", + request, msg.request); + return -1; } + if (msg.size != sizeof(m.u64)) { + error_report("Received bad msg size."); + return -1; + } + + *u64 = msg.u64; + + return 0; +} + +static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features) +{ + return vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features); +} + +static int vhost_user_set_owner(struct vhost_dev *dev) +{ + VhostUserMsg msg = { + .request = VHOST_USER_SET_OWNER, + .flags = VHOST_USER_VERSION, + }; + + vhost_user_write(dev, &msg, NULL, 0); + + return 0; +} + +static int vhost_user_reset_device(struct vhost_dev *dev) +{ + VhostUserMsg msg = { + .request = VHOST_USER_RESET_DEVICE, + .flags = VHOST_USER_VERSION, + }; + + vhost_user_write(dev, &msg, NULL, 0); + return 0; } static int vhost_user_init(struct vhost_dev *dev, void *opaque) { - unsigned long long features; + uint64_t features; int err; assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); dev->opaque = opaque; - err = vhost_user_call(dev, VHOST_USER_GET_FEATURES, &features); + err = vhost_user_get_features(dev, &features); if (err < 0) { return err; } @@ -384,44 +512,37 @@ static int vhost_user_init(struct vhost_dev *dev, void *opaque) if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) { dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES; - err = vhost_user_call(dev, VHOST_USER_GET_PROTOCOL_FEATURES, &features); + err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES, + &features); if (err < 0) { return err; } dev->protocol_features = features & VHOST_USER_PROTOCOL_FEATURE_MASK; - err = vhost_user_call(dev, VHOST_USER_SET_PROTOCOL_FEATURES, - &dev->protocol_features); + err = vhost_user_set_protocol_features(dev, dev->protocol_features); if (err < 0) { return err; } /* query the max queues we support if backend supports Multiple Queue */ if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) { - err = vhost_user_call(dev, VHOST_USER_GET_QUEUE_NUM, &dev->max_queues); + err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM, + &dev->max_queues); if (err < 0) { return err; } } } - return 0; -} - -static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable) -{ - struct vhost_vring_state state = { - .index = dev->vq_index, - .num = enable, - }; - - assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); - - if (!(dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ))) { - return -1; + if (dev->migration_blocker == NULL && + !virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_LOG_SHMFD)) { + error_setg(&dev->migration_blocker, + "Migration disabled: vhost-user backend lacks " + "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature."); } - return vhost_user_call(dev, VHOST_USER_SET_VRING_ENABLE, &state); + return 0; } static int vhost_user_cleanup(struct vhost_dev *dev) @@ -440,11 +561,65 @@ static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx) return idx; } +static int vhost_user_memslots_limit(struct vhost_dev *dev) +{ + return VHOST_MEMORY_MAX_NREGIONS; +} + +static bool vhost_user_requires_shm_log(struct vhost_dev *dev) +{ + assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); + + return virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_LOG_SHMFD); +} + +static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr) +{ + VhostUserMsg msg = { 0 }; + int err; + + assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); + + /* If guest supports GUEST_ANNOUNCE do nothing */ + if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) { + return 0; + } + + /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */ + if (virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_RARP)) { + msg.request = VHOST_USER_SEND_RARP; + msg.flags = VHOST_USER_VERSION; + memcpy((char *)&msg.u64, mac_addr, 6); + msg.size = sizeof(m.u64); + + err = vhost_user_write(dev, &msg, NULL, 0); + return err; + } + return -1; +} + const VhostOps user_ops = { .backend_type = VHOST_BACKEND_TYPE_USER, - .vhost_call = vhost_user_call, .vhost_backend_init = vhost_user_init, .vhost_backend_cleanup = vhost_user_cleanup, - .vhost_backend_get_vq_index = vhost_user_get_vq_index, - .vhost_backend_set_vring_enable = vhost_user_set_vring_enable, + .vhost_backend_memslots_limit = vhost_user_memslots_limit, + .vhost_set_log_base = vhost_user_set_log_base, + .vhost_set_mem_table = vhost_user_set_mem_table, + .vhost_set_vring_addr = vhost_user_set_vring_addr, + .vhost_set_vring_endian = vhost_user_set_vring_endian, + .vhost_set_vring_num = vhost_user_set_vring_num, + .vhost_set_vring_base = vhost_user_set_vring_base, + .vhost_get_vring_base = vhost_user_get_vring_base, + .vhost_set_vring_kick = vhost_user_set_vring_kick, + .vhost_set_vring_call = vhost_user_set_vring_call, + .vhost_set_features = vhost_user_set_features, + .vhost_get_features = vhost_user_get_features, + .vhost_set_owner = vhost_user_set_owner, + .vhost_reset_device = vhost_user_reset_device, + .vhost_get_vq_index = vhost_user_get_vq_index, + .vhost_set_vring_enable = vhost_user_set_vring_enable, + .vhost_requires_shm_log = vhost_user_requires_shm_log, + .vhost_migration_done = vhost_user_migration_done, }; diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index c0ed5b2..de29968 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -18,6 +18,7 @@ #include "qemu/atomic.h" #include "qemu/range.h" #include "qemu/error-report.h" +#include "qemu/memfd.h" #include <linux/vhost.h> #include "exec/address-spaces.h" #include "hw/virtio/virtio-bus.h" @@ -25,6 +26,23 @@ #include "migration/migration.h" static struct vhost_log *vhost_log; +static struct vhost_log *vhost_log_shm; + +static unsigned int used_memslots; +static QLIST_HEAD(, vhost_dev) vhost_devices = + QLIST_HEAD_INITIALIZER(vhost_devices); + +bool vhost_has_free_slot(void) +{ + unsigned int slots_limit = ~0U; + struct vhost_dev *hdev; + + QLIST_FOREACH(hdev, &vhost_devices, entry) { + unsigned int r = hdev->vhost_ops->vhost_backend_memslots_limit(hdev); + slots_limit = MIN(slots_limit, r); + } + return slots_limit > used_memslots; +} static void vhost_dev_sync_region(struct vhost_dev *dev, MemoryRegionSection *section, @@ -286,25 +304,46 @@ static uint64_t vhost_get_log_size(struct vhost_dev *dev) } return log_size; } -static struct vhost_log *vhost_log_alloc(uint64_t size) + +static struct vhost_log *vhost_log_alloc(uint64_t size, bool share) { - struct vhost_log *log = g_malloc0(sizeof *log + size * sizeof(*(log->log))); + struct vhost_log *log; + uint64_t logsize = size * sizeof(*(log->log)); + int fd = -1; + + log = g_new0(struct vhost_log, 1); + if (share) { + log->log = qemu_memfd_alloc("vhost-log", logsize, + F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL, + &fd); + memset(log->log, 0, logsize); + } else { + log->log = g_malloc0(logsize); + } log->size = size; log->refcnt = 1; + log->fd = fd; return log; } -static struct vhost_log *vhost_log_get(uint64_t size) +static struct vhost_log *vhost_log_get(uint64_t size, bool share) { - if (!vhost_log || vhost_log->size != size) { - vhost_log = vhost_log_alloc(size); + struct vhost_log *log = share ? vhost_log_shm : vhost_log; + + if (!log || log->size != size) { + log = vhost_log_alloc(size, share); + if (share) { + vhost_log_shm = log; + } else { + vhost_log = log; + } } else { - ++vhost_log->refcnt; + ++log->refcnt; } - return vhost_log; + return log; } static void vhost_log_put(struct vhost_dev *dev, bool sync) @@ -321,20 +360,35 @@ static void vhost_log_put(struct vhost_dev *dev, bool sync) if (dev->log_size && sync) { vhost_log_sync_range(dev, 0, dev->log_size * VHOST_LOG_CHUNK - 1); } + if (vhost_log == log) { + g_free(log->log); vhost_log = NULL; + } else if (vhost_log_shm == log) { + qemu_memfd_free(log->log, log->size * sizeof(*(log->log)), + log->fd); + vhost_log_shm = NULL; } + g_free(log); } } -static inline void vhost_dev_log_resize(struct vhost_dev* dev, uint64_t size) +static bool vhost_dev_log_is_shared(struct vhost_dev *dev) { - struct vhost_log *log = vhost_log_get(size); + return dev->vhost_ops->vhost_requires_shm_log && + dev->vhost_ops->vhost_requires_shm_log(dev); +} + +static inline void vhost_dev_log_resize(struct vhost_dev *dev, uint64_t size) +{ + struct vhost_log *log = vhost_log_get(size, vhost_dev_log_is_shared(dev)); uint64_t log_base = (uintptr_t)log->log; int r; - r = dev->vhost_ops->vhost_call(dev, VHOST_SET_LOG_BASE, &log_base); + /* inform backend of log switching, this must be done before + releasing the current log, to ensure no logging is lost */ + r = dev->vhost_ops->vhost_set_log_base(dev, log_base, log); assert(r >= 0); vhost_log_put(dev, true); dev->log = log; @@ -457,6 +511,7 @@ static void vhost_set_memory(MemoryListener *listener, dev->mem_changed_start_addr = MIN(dev->mem_changed_start_addr, start_addr); dev->mem_changed_end_addr = MAX(dev->mem_changed_end_addr, start_addr + size - 1); dev->memory_changed = true; + used_memslots = dev->mem->nregions; } static bool vhost_section(MemoryRegionSection *section) @@ -500,7 +555,7 @@ static void vhost_commit(MemoryListener *listener) } if (!dev->log_enabled) { - r = dev->vhost_ops->vhost_call(dev, VHOST_SET_MEM_TABLE, dev->mem); + r = dev->vhost_ops->vhost_set_mem_table(dev, dev->mem); assert(r >= 0); dev->memory_changed = false; return; @@ -513,7 +568,7 @@ static void vhost_commit(MemoryListener *listener) if (dev->log_size < log_size) { vhost_dev_log_resize(dev, log_size + VHOST_LOG_BUFFER); } - r = dev->vhost_ops->vhost_call(dev, VHOST_SET_MEM_TABLE, dev->mem); + r = dev->vhost_ops->vhost_set_mem_table(dev, dev->mem); assert(r >= 0); /* To log less, can only decrease log size after table update. */ if (dev->log_size > log_size + VHOST_LOG_BUFFER) { @@ -581,7 +636,7 @@ static int vhost_virtqueue_set_addr(struct vhost_dev *dev, .log_guest_addr = vq->used_phys, .flags = enable_log ? (1 << VHOST_VRING_F_LOG) : 0, }; - int r = dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_ADDR, &addr); + int r = dev->vhost_ops->vhost_set_vring_addr(dev, &addr); if (r < 0) { return -errno; } @@ -595,19 +650,20 @@ static int vhost_dev_set_features(struct vhost_dev *dev, bool enable_log) if (enable_log) { features |= 0x1ULL << VHOST_F_LOG_ALL; } - r = dev->vhost_ops->vhost_call(dev, VHOST_SET_FEATURES, &features); + r = dev->vhost_ops->vhost_set_features(dev, features); return r < 0 ? -errno : 0; } static int vhost_dev_set_log(struct vhost_dev *dev, bool enable_log) { - int r, t, i; + int r, t, i, idx; r = vhost_dev_set_features(dev, enable_log); if (r < 0) { goto err_features; } for (i = 0; i < dev->nvqs; ++i) { - r = vhost_virtqueue_set_addr(dev, dev->vqs + i, i, + idx = dev->vhost_ops->vhost_get_vq_index(dev, dev->vq_index + i); + r = vhost_virtqueue_set_addr(dev, dev->vqs + i, idx, enable_log); if (r < 0) { goto err_vq; @@ -616,7 +672,8 @@ static int vhost_dev_set_log(struct vhost_dev *dev, bool enable_log) return 0; err_vq: for (; i >= 0; --i) { - t = vhost_virtqueue_set_addr(dev, dev->vqs + i, i, + idx = dev->vhost_ops->vhost_get_vq_index(dev, dev->vq_index + i); + t = vhost_virtqueue_set_addr(dev, dev->vqs + i, idx, dev->log_enabled); assert(t >= 0); } @@ -700,7 +757,7 @@ static int vhost_virtqueue_set_vring_endian_legacy(struct vhost_dev *dev, .num = is_big_endian }; - if (!dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_ENDIAN, &s)) { + if (!dev->vhost_ops->vhost_set_vring_endian(dev, &s)) { return 0; } @@ -719,7 +776,7 @@ static int vhost_virtqueue_start(struct vhost_dev *dev, { hwaddr s, l, a; int r; - int vhost_vq_index = dev->vhost_ops->vhost_backend_get_vq_index(dev, idx); + int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, idx); struct vhost_vring_file file = { .index = vhost_vq_index }; @@ -730,13 +787,13 @@ static int vhost_virtqueue_start(struct vhost_dev *dev, vq->num = state.num = virtio_queue_get_num(vdev, idx); - r = dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_NUM, &state); + r = dev->vhost_ops->vhost_set_vring_num(dev, &state); if (r) { return -errno; } state.num = virtio_queue_get_last_avail_idx(vdev, idx); - r = dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_BASE, &state); + r = dev->vhost_ops->vhost_set_vring_base(dev, &state); if (r) { return -errno; } @@ -788,7 +845,7 @@ static int vhost_virtqueue_start(struct vhost_dev *dev, } file.fd = event_notifier_get_fd(virtio_queue_get_host_notifier(vvq)); - r = dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_KICK, &file); + r = dev->vhost_ops->vhost_set_vring_kick(dev, &file); if (r) { r = -errno; goto fail_kick; @@ -821,13 +878,13 @@ static void vhost_virtqueue_stop(struct vhost_dev *dev, struct vhost_virtqueue *vq, unsigned idx) { - int vhost_vq_index = dev->vhost_ops->vhost_backend_get_vq_index(dev, idx); + int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, idx); struct vhost_vring_state state = { .index = vhost_vq_index, }; int r; - r = dev->vhost_ops->vhost_call(dev, VHOST_GET_VRING_BASE, &state); + r = dev->vhost_ops->vhost_get_vring_base(dev, &state); if (r < 0) { fprintf(stderr, "vhost VQ %d ring restore failed: %d\n", idx, r); fflush(stderr); @@ -874,7 +931,7 @@ static void vhost_eventfd_del(MemoryListener *listener, static int vhost_virtqueue_init(struct vhost_dev *dev, struct vhost_virtqueue *vq, int n) { - int vhost_vq_index = dev->vhost_ops->vhost_backend_get_vq_index(dev, n); + int vhost_vq_index = dev->vhost_ops->vhost_get_vq_index(dev, n); struct vhost_vring_file file = { .index = vhost_vq_index, }; @@ -884,7 +941,7 @@ static int vhost_virtqueue_init(struct vhost_dev *dev, } file.fd = event_notifier_get_fd(&vq->masked_notifier); - r = dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_CALL, &file); + r = dev->vhost_ops->vhost_set_vring_call(dev, &file); if (r) { r = -errno; goto fail_call; @@ -906,6 +963,8 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, uint64_t features; int i, r; + hdev->migration_blocker = NULL; + if (vhost_set_backend_type(hdev, backend_type) < 0) { close((uintptr_t)opaque); return -1; @@ -916,12 +975,20 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, return -errno; } - r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_OWNER, NULL); + if (used_memslots > hdev->vhost_ops->vhost_backend_memslots_limit(hdev)) { + fprintf(stderr, "vhost backend memory slots limit is less" + " than current number of present memory slots\n"); + close((uintptr_t)opaque); + return -1; + } + QLIST_INSERT_HEAD(&vhost_devices, hdev, entry); + + r = hdev->vhost_ops->vhost_set_owner(hdev); if (r < 0) { goto fail; } - r = hdev->vhost_ops->vhost_call(hdev, VHOST_GET_FEATURES, &features); + r = hdev->vhost_ops->vhost_get_features(hdev, &features); if (r < 0) { goto fail; } @@ -949,12 +1016,21 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, .eventfd_del = vhost_eventfd_del, .priority = 10 }; - hdev->migration_blocker = NULL; - if (!(hdev->features & (0x1ULL << VHOST_F_LOG_ALL))) { - error_setg(&hdev->migration_blocker, - "Migration disabled: vhost lacks VHOST_F_LOG_ALL feature."); + + if (hdev->migration_blocker == NULL) { + if (!(hdev->features & (0x1ULL << VHOST_F_LOG_ALL))) { + error_setg(&hdev->migration_blocker, + "Migration disabled: vhost lacks VHOST_F_LOG_ALL feature."); + } else if (!qemu_memfd_check()) { + error_setg(&hdev->migration_blocker, + "Migration disabled: failed to allocate shared memory"); + } + } + + if (hdev->migration_blocker != NULL) { migrate_add_blocker(hdev->migration_blocker); } + hdev->mem = g_malloc0(offsetof(struct vhost_memory, regions)); hdev->n_mem_sections = 0; hdev->mem_sections = NULL; @@ -972,6 +1048,7 @@ fail_vq: fail: r = -errno; hdev->vhost_ops->vhost_backend_cleanup(hdev); + QLIST_REMOVE(hdev, entry); return r; } @@ -989,6 +1066,7 @@ void vhost_dev_cleanup(struct vhost_dev *hdev) g_free(hdev->mem); g_free(hdev->mem_sections); hdev->vhost_ops->vhost_backend_cleanup(hdev); + QLIST_REMOVE(hdev, entry); } /* Stop processing guest IO notifications in qemu. @@ -1074,8 +1152,8 @@ void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n, file.fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vvq)); } - file.index = hdev->vhost_ops->vhost_backend_get_vq_index(hdev, n); - r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_VRING_CALL, &file); + file.index = hdev->vhost_ops->vhost_get_vq_index(hdev, n); + r = hdev->vhost_ops->vhost_set_vring_call(hdev, &file); assert(r >= 0); } @@ -1117,7 +1195,7 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev) if (r < 0) { goto fail_features; } - r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_MEM_TABLE, hdev->mem); + r = hdev->vhost_ops->vhost_set_mem_table(hdev, hdev->mem); if (r < 0) { r = -errno; goto fail_mem; @@ -1136,10 +1214,12 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev) uint64_t log_base; hdev->log_size = vhost_get_log_size(hdev); - hdev->log = vhost_log_get(hdev->log_size); + hdev->log = vhost_log_get(hdev->log_size, + vhost_dev_log_is_shared(hdev)); log_base = (uintptr_t)hdev->log->log; - r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_LOG_BASE, - hdev->log_size ? &log_base : NULL); + r = hdev->vhost_ops->vhost_set_log_base(hdev, + hdev->log_size ? log_base : 0, + hdev->log); if (r < 0) { r = -errno; goto fail_log; |