diff options
41 files changed, 1739 insertions, 306 deletions
@@ -54,6 +54,7 @@ /module_block.h /scsi/qemu-pr-helper /vhost-user-scsi +/vhost-user-blk /fsdev/virtfs-proxy-helper *.tmp *.[1-9] diff --git a/MAINTAINERS b/MAINTAINERS index 4770f10..753e799 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -852,6 +852,7 @@ X86 Machines ------------ PC M: Michael S. Tsirkin <mst@redhat.com> +M: Marcel Apfelbaum <marcel@redhat.com> S: Supported F: include/hw/i386/ F: hw/i386/ @@ -334,6 +334,7 @@ dummy := $(call unnest-vars,, \ ivshmem-server-obj-y \ libvhost-user-obj-y \ vhost-user-scsi-obj-y \ + vhost-user-blk-obj-y \ qga-vss-dll-obj-y \ block-obj-y \ block-obj-m \ @@ -565,6 +566,8 @@ ivshmem-server$(EXESUF): $(ivshmem-server-obj-y) $(COMMON_LDADDS) endif vhost-user-scsi$(EXESUF): $(vhost-user-scsi-obj-y) libvhost-user.a $(call LINK, $^) +vhost-user-blk$(EXESUF): $(vhost-user-blk-obj-y) libvhost-user.a + $(call LINK, $^) module_block.h: $(SRC_PATH)/scripts/modules/module_block.py config-host.mak $(call quiet-command,$(PYTHON) $< $@ \ diff --git a/Makefile.objs b/Makefile.objs index c8b1bba..669d8d6 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -115,6 +115,7 @@ libvhost-user-obj-y = contrib/libvhost-user/ vhost-user-scsi.o-cflags := $(LIBISCSI_CFLAGS) vhost-user-scsi.o-libs := $(LIBISCSI_LIBS) vhost-user-scsi-obj-y = contrib/vhost-user-scsi/ +vhost-user-blk-obj-y = contrib/vhost-user-blk/ ###################################################################### trace-events-subdirs = diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index f290f48..071f4f5 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -812,6 +812,10 @@ static void kvm_mem_ioeventfd_del(MemoryListener *listener, if (r < 0) { abort(); } + + if (e->cleanup) { + e->cleanup(e); + } } static void kvm_io_ioeventfd_add(MemoryListener *listener, diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c index f409bd3..27cc597 100644 --- a/contrib/libvhost-user/libvhost-user.c +++ b/contrib/libvhost-user/libvhost-user.c @@ -84,6 +84,8 @@ vu_request_to_string(unsigned int req) REQ(VHOST_USER_SET_SLAVE_REQ_FD), REQ(VHOST_USER_IOTLB_MSG), REQ(VHOST_USER_SET_VRING_ENDIAN), + REQ(VHOST_USER_GET_CONFIG), + REQ(VHOST_USER_SET_CONFIG), REQ(VHOST_USER_MAX), }; #undef REQ @@ -798,6 +800,42 @@ vu_set_slave_req_fd(VuDev *dev, VhostUserMsg *vmsg) } static bool +vu_get_config(VuDev *dev, VhostUserMsg *vmsg) +{ + int ret = -1; + + if (dev->iface->get_config) { + ret = dev->iface->get_config(dev, vmsg->payload.config.region, + vmsg->payload.config.size); + } + + if (ret) { + /* resize to zero to indicate an error to master */ + vmsg->size = 0; + } + + return true; +} + +static bool +vu_set_config(VuDev *dev, VhostUserMsg *vmsg) +{ + int ret = -1; + + if (dev->iface->set_config) { + ret = dev->iface->set_config(dev, vmsg->payload.config.region, + vmsg->payload.config.offset, + vmsg->payload.config.size, + vmsg->payload.config.flags); + if (ret) { + vu_panic(dev, "Set virtio configuration space failed"); + } + } + + return false; +} + +static bool vu_process_message(VuDev *dev, VhostUserMsg *vmsg) { int do_reply = 0; @@ -862,6 +900,10 @@ vu_process_message(VuDev *dev, VhostUserMsg *vmsg) return vu_set_vring_enable_exec(dev, vmsg); case VHOST_USER_SET_SLAVE_REQ_FD: return vu_set_slave_req_fd(dev, vmsg); + case VHOST_USER_GET_CONFIG: + return vu_get_config(dev, vmsg); + case VHOST_USER_SET_CONFIG: + return vu_set_config(dev, vmsg); case VHOST_USER_NONE: break; default: diff --git a/contrib/libvhost-user/libvhost-user.h b/contrib/libvhost-user/libvhost-user.h index 2f5864b..f8a730b 100644 --- a/contrib/libvhost-user/libvhost-user.h +++ b/contrib/libvhost-user/libvhost-user.h @@ -30,6 +30,16 @@ #define VHOST_MEMORY_MAX_NREGIONS 8 +typedef enum VhostSetConfigType { + VHOST_SET_CONFIG_TYPE_MASTER = 0, + VHOST_SET_CONFIG_TYPE_MIGRATION = 1, +} VhostSetConfigType; + +/* + * Maximum size of virtio device config space + */ +#define VHOST_USER_MAX_CONFIG_SIZE 256 + enum VhostUserProtocolFeature { VHOST_USER_PROTOCOL_F_MQ = 0, VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1, @@ -69,6 +79,8 @@ typedef enum VhostUserRequest { VHOST_USER_SET_SLAVE_REQ_FD = 21, VHOST_USER_IOTLB_MSG = 22, VHOST_USER_SET_VRING_ENDIAN = 23, + VHOST_USER_GET_CONFIG = 24, + VHOST_USER_SET_CONFIG = 25, VHOST_USER_MAX } VhostUserRequest; @@ -90,6 +102,18 @@ typedef struct VhostUserLog { uint64_t mmap_offset; } VhostUserLog; +typedef struct VhostUserConfig { + uint32_t offset; + uint32_t size; + uint32_t flags; + uint8_t region[VHOST_USER_MAX_CONFIG_SIZE]; +} VhostUserConfig; + +static VhostUserConfig c __attribute__ ((unused)); +#define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \ + + sizeof(c.size) \ + + sizeof(c.flags)) + #if defined(_WIN32) # define VU_PACKED __attribute__((gcc_struct, packed)) #else @@ -112,6 +136,7 @@ typedef struct VhostUserMsg { struct vhost_vring_addr addr; VhostUserMemory memory; VhostUserLog log; + VhostUserConfig config; } payload; int fds[VHOST_MEMORY_MAX_NREGIONS]; @@ -140,6 +165,10 @@ typedef int (*vu_process_msg_cb) (VuDev *dev, VhostUserMsg *vmsg, int *do_reply); typedef void (*vu_queue_set_started_cb) (VuDev *dev, int qidx, bool started); typedef bool (*vu_queue_is_processed_in_order_cb) (VuDev *dev, int qidx); +typedef int (*vu_get_config_cb) (VuDev *dev, uint8_t *config, uint32_t len); +typedef int (*vu_set_config_cb) (VuDev *dev, const uint8_t *data, + uint32_t offset, uint32_t size, + uint32_t flags); typedef struct VuDevIface { /* called by VHOST_USER_GET_FEATURES to get the features bitmask */ @@ -162,6 +191,10 @@ typedef struct VuDevIface { * on unmanaged exit/crash. */ vu_queue_is_processed_in_order_cb queue_is_processed_in_order; + /* get the config space of the device */ + vu_get_config_cb get_config; + /* set the config space of the device */ + vu_set_config_cb set_config; } VuDevIface; typedef void (*vu_queue_handler_cb) (VuDev *dev, int qidx); diff --git a/contrib/vhost-user-blk/Makefile.objs b/contrib/vhost-user-blk/Makefile.objs new file mode 100644 index 0000000..72e2cdc --- /dev/null +++ b/contrib/vhost-user-blk/Makefile.objs @@ -0,0 +1 @@ +vhost-user-blk-obj-y = vhost-user-blk.o diff --git a/contrib/vhost-user-blk/vhost-user-blk.c b/contrib/vhost-user-blk/vhost-user-blk.c new file mode 100644 index 0000000..67dac81 --- /dev/null +++ b/contrib/vhost-user-blk/vhost-user-blk.c @@ -0,0 +1,545 @@ +/* + * vhost-user-blk sample application + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * + * Author: + * Changpeng Liu <changpeng.liu@intel.com> + * + * This work is based on the "vhost-user-scsi" sample and "virtio-blk" driver + * implementation by: + * Felipe Franciosi <felipe@nutanix.com> + * Anthony Liguori <aliguori@us.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 only. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "standard-headers/linux/virtio_blk.h" +#include "contrib/libvhost-user/libvhost-user-glib.h" +#include "contrib/libvhost-user/libvhost-user.h" + +#include <glib.h> + +struct virtio_blk_inhdr { + unsigned char status; +}; + +/* vhost user block device */ +typedef struct VubDev { + VugDev parent; + int blk_fd; + struct virtio_blk_config blkcfg; + char *blk_name; + GMainLoop *loop; +} VubDev; + +typedef struct VubReq { + VuVirtqElement *elem; + int64_t sector_num; + size_t size; + struct virtio_blk_inhdr *in; + struct virtio_blk_outhdr *out; + VubDev *vdev_blk; + struct VuVirtq *vq; +} VubReq; + +/* refer util/iov.c */ +static size_t vub_iov_size(const struct iovec *iov, + const unsigned int iov_cnt) +{ + size_t len; + unsigned int i; + + len = 0; + for (i = 0; i < iov_cnt; i++) { + len += iov[i].iov_len; + } + return len; +} + +static void vub_panic_cb(VuDev *vu_dev, const char *buf) +{ + VugDev *gdev; + VubDev *vdev_blk; + + assert(vu_dev); + + gdev = container_of(vu_dev, VugDev, parent); + vdev_blk = container_of(gdev, VubDev, parent); + if (buf) { + g_warning("vu_panic: %s", buf); + } + + g_main_loop_quit(vdev_blk->loop); +} + +static void vub_req_complete(VubReq *req) +{ + VugDev *gdev = &req->vdev_blk->parent; + VuDev *vu_dev = &gdev->parent; + + /* IO size with 1 extra status byte */ + vu_queue_push(vu_dev, req->vq, req->elem, + req->size + 1); + vu_queue_notify(vu_dev, req->vq); + + if (req->elem) { + free(req->elem); + } + + g_free(req); +} + +static int vub_open(const char *file_name, bool wce) +{ + int fd; + int flags = O_RDWR; + + if (!wce) { + flags |= O_DIRECT; + } + + fd = open(file_name, flags); + if (fd < 0) { + fprintf(stderr, "Cannot open file %s, %s\n", file_name, + strerror(errno)); + return -1; + } + + return fd; +} + +static ssize_t +vub_readv(VubReq *req, struct iovec *iov, uint32_t iovcnt) +{ + VubDev *vdev_blk = req->vdev_blk; + ssize_t rc; + + if (!iovcnt) { + fprintf(stderr, "Invalid Read IOV count\n"); + return -1; + } + + req->size = vub_iov_size(iov, iovcnt); + rc = preadv(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512); + if (rc < 0) { + fprintf(stderr, "%s, Sector %"PRIu64", Size %lu failed with %s\n", + vdev_blk->blk_name, req->sector_num, req->size, + strerror(errno)); + return -1; + } + + return rc; +} + +static ssize_t +vub_writev(VubReq *req, struct iovec *iov, uint32_t iovcnt) +{ + VubDev *vdev_blk = req->vdev_blk; + ssize_t rc; + + if (!iovcnt) { + fprintf(stderr, "Invalid Write IOV count\n"); + return -1; + } + + req->size = vub_iov_size(iov, iovcnt); + rc = pwritev(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512); + if (rc < 0) { + fprintf(stderr, "%s, Sector %"PRIu64", Size %lu failed with %s\n", + vdev_blk->blk_name, req->sector_num, req->size, + strerror(errno)); + return -1; + } + + return rc; +} + +static void +vub_flush(VubReq *req) +{ + VubDev *vdev_blk = req->vdev_blk; + + fdatasync(vdev_blk->blk_fd); +} + +static int vub_virtio_process_req(VubDev *vdev_blk, + VuVirtq *vq) +{ + VugDev *gdev = &vdev_blk->parent; + VuDev *vu_dev = &gdev->parent; + VuVirtqElement *elem; + uint32_t type; + unsigned in_num; + unsigned out_num; + VubReq *req; + + elem = vu_queue_pop(vu_dev, vq, sizeof(VuVirtqElement) + sizeof(VubReq)); + if (!elem) { + return -1; + } + + /* refer to hw/block/virtio_blk.c */ + if (elem->out_num < 1 || elem->in_num < 1) { + fprintf(stderr, "virtio-blk request missing headers\n"); + free(elem); + return -1; + } + + req = g_new0(VubReq, 1); + req->vdev_blk = vdev_blk; + req->vq = vq; + req->elem = elem; + + in_num = elem->in_num; + out_num = elem->out_num; + + /* don't support VIRTIO_F_ANY_LAYOUT and virtio 1.0 only */ + if (elem->out_sg[0].iov_len < sizeof(struct virtio_blk_outhdr)) { + fprintf(stderr, "Invalid outhdr size\n"); + goto err; + } + req->out = (struct virtio_blk_outhdr *)elem->out_sg[0].iov_base; + out_num--; + + if (elem->in_sg[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) { + fprintf(stderr, "Invalid inhdr size\n"); + goto err; + } + req->in = (struct virtio_blk_inhdr *)elem->in_sg[in_num - 1].iov_base; + in_num--; + + type = le32toh(req->out->type); + switch (type & ~(VIRTIO_BLK_T_OUT | VIRTIO_BLK_T_BARRIER)) { + case VIRTIO_BLK_T_IN: { + ssize_t ret = 0; + bool is_write = type & VIRTIO_BLK_T_OUT; + req->sector_num = le64toh(req->out->sector); + if (is_write) { + ret = vub_writev(req, &elem->out_sg[1], out_num); + } else { + ret = vub_readv(req, &elem->in_sg[0], in_num); + } + if (ret >= 0) { + req->in->status = VIRTIO_BLK_S_OK; + } else { + req->in->status = VIRTIO_BLK_S_IOERR; + } + vub_req_complete(req); + break; + } + case VIRTIO_BLK_T_FLUSH: { + vub_flush(req); + req->in->status = VIRTIO_BLK_S_OK; + vub_req_complete(req); + break; + } + case VIRTIO_BLK_T_GET_ID: { + size_t size = MIN(vub_iov_size(&elem->in_sg[0], in_num), + VIRTIO_BLK_ID_BYTES); + snprintf(elem->in_sg[0].iov_base, size, "%s", "vhost_user_blk"); + req->in->status = VIRTIO_BLK_S_OK; + req->size = elem->in_sg[0].iov_len; + vub_req_complete(req); + break; + } + default: { + req->in->status = VIRTIO_BLK_S_UNSUPP; + vub_req_complete(req); + break; + } + } + + return 0; + +err: + free(elem); + g_free(req); + return -1; +} + +static void vub_process_vq(VuDev *vu_dev, int idx) +{ + VugDev *gdev; + VubDev *vdev_blk; + VuVirtq *vq; + int ret; + + if ((idx < 0) || (idx >= VHOST_MAX_NR_VIRTQUEUE)) { + fprintf(stderr, "VQ Index out of range: %d\n", idx); + vub_panic_cb(vu_dev, NULL); + return; + } + + gdev = container_of(vu_dev, VugDev, parent); + vdev_blk = container_of(gdev, VubDev, parent); + assert(vdev_blk); + + vq = vu_get_queue(vu_dev, idx); + assert(vq); + + while (1) { + ret = vub_virtio_process_req(vdev_blk, vq); + if (ret) { + break; + } + } +} + +static void vub_queue_set_started(VuDev *vu_dev, int idx, bool started) +{ + VuVirtq *vq; + + assert(vu_dev); + + vq = vu_get_queue(vu_dev, idx); + vu_set_queue_handler(vu_dev, vq, started ? vub_process_vq : NULL); +} + +static uint64_t +vub_get_features(VuDev *dev) +{ + return 1ull << VIRTIO_BLK_F_SIZE_MAX | + 1ull << VIRTIO_BLK_F_SEG_MAX | + 1ull << VIRTIO_BLK_F_TOPOLOGY | + 1ull << VIRTIO_BLK_F_BLK_SIZE | + 1ull << VIRTIO_BLK_F_FLUSH | + 1ull << VIRTIO_BLK_F_CONFIG_WCE | + 1ull << VIRTIO_F_VERSION_1 | + 1ull << VHOST_USER_F_PROTOCOL_FEATURES; +} + +static int +vub_get_config(VuDev *vu_dev, uint8_t *config, uint32_t len) +{ + VugDev *gdev; + VubDev *vdev_blk; + + gdev = container_of(vu_dev, VugDev, parent); + vdev_blk = container_of(gdev, VubDev, parent); + memcpy(config, &vdev_blk->blkcfg, len); + + return 0; +} + +static int +vub_set_config(VuDev *vu_dev, const uint8_t *data, + uint32_t offset, uint32_t size, uint32_t flags) +{ + VugDev *gdev; + VubDev *vdev_blk; + uint8_t wce; + int fd; + + /* don't support live migration */ + if (flags != VHOST_SET_CONFIG_TYPE_MASTER) { + return -1; + } + + gdev = container_of(vu_dev, VugDev, parent); + vdev_blk = container_of(gdev, VubDev, parent); + + if (offset != offsetof(struct virtio_blk_config, wce) || + size != 1) { + return -1; + } + + wce = *data; + if (wce == vdev_blk->blkcfg.wce) { + /* Do nothing as same with old configuration */ + return 0; + } + + vdev_blk->blkcfg.wce = wce; + fprintf(stdout, "Write Cache Policy Changed\n"); + if (vdev_blk->blk_fd >= 0) { + close(vdev_blk->blk_fd); + vdev_blk->blk_fd = -1; + } + + fd = vub_open(vdev_blk->blk_name, wce); + if (fd < 0) { + fprintf(stderr, "Error to open block device %s\n", vdev_blk->blk_name); + vdev_blk->blk_fd = -1; + return -1; + } + vdev_blk->blk_fd = fd; + + return 0; +} + +static const VuDevIface vub_iface = { + .get_features = vub_get_features, + .queue_set_started = vub_queue_set_started, + .get_config = vub_get_config, + .set_config = vub_set_config, +}; + +static int unix_sock_new(char *unix_fn) +{ + int sock; + struct sockaddr_un un; + size_t len; + + assert(unix_fn); + + sock = socket(AF_UNIX, SOCK_STREAM, 0); + if (sock <= 0) { + perror("socket"); + return -1; + } + + un.sun_family = AF_UNIX; + (void)snprintf(un.sun_path, sizeof(un.sun_path), "%s", unix_fn); + len = sizeof(un.sun_family) + strlen(un.sun_path); + + (void)unlink(unix_fn); + if (bind(sock, (struct sockaddr *)&un, len) < 0) { + perror("bind"); + goto fail; + } + + if (listen(sock, 1) < 0) { + perror("listen"); + goto fail; + } + + return sock; + +fail: + (void)close(sock); + + return -1; +} + +static void vub_free(struct VubDev *vdev_blk) +{ + if (!vdev_blk) { + return; + } + + g_main_loop_unref(vdev_blk->loop); + if (vdev_blk->blk_fd >= 0) { + close(vdev_blk->blk_fd); + } + g_free(vdev_blk); +} + +static uint32_t +vub_get_blocksize(int fd) +{ + uint32_t blocksize = 512; + +#if defined(__linux__) && defined(BLKSSZGET) + if (ioctl(fd, BLKSSZGET, &blocksize) == 0) { + return blocklen; + } +#endif + + return blocksize; +} + +static void +vub_initialize_config(int fd, struct virtio_blk_config *config) +{ + off64_t capacity; + + capacity = lseek64(fd, 0, SEEK_END); + config->capacity = capacity >> 9; + config->blk_size = vub_get_blocksize(fd); + config->size_max = 65536; + config->seg_max = 128 - 2; + config->min_io_size = 1; + config->opt_io_size = 1; + config->num_queues = 1; +} + +static VubDev * +vub_new(char *blk_file) +{ + VubDev *vdev_blk; + + vdev_blk = g_new0(VubDev, 1); + vdev_blk->loop = g_main_loop_new(NULL, FALSE); + vdev_blk->blk_fd = vub_open(blk_file, 0); + if (vdev_blk->blk_fd < 0) { + fprintf(stderr, "Error to open block device %s\n", blk_file); + vub_free(vdev_blk); + return NULL; + } + vdev_blk->blkcfg.wce = 0; + vdev_blk->blk_name = blk_file; + + /* fill virtio_blk_config with block parameters */ + vub_initialize_config(vdev_blk->blk_fd, &vdev_blk->blkcfg); + + return vdev_blk; +} + +int main(int argc, char **argv) +{ + int opt; + char *unix_socket = NULL; + char *blk_file = NULL; + int lsock = -1, csock = -1; + VubDev *vdev_blk = NULL; + + while ((opt = getopt(argc, argv, "b:s:h")) != -1) { + switch (opt) { + case 'b': + blk_file = g_strdup(optarg); + break; + case 's': + unix_socket = g_strdup(optarg); + break; + case 'h': + default: + printf("Usage: %s [-b block device or file, -s UNIX domain socket]" + " | [ -h ]\n", argv[0]); + return 0; + } + } + + if (!unix_socket || !blk_file) { + printf("Usage: %s [-b block device or file, -s UNIX domain socket] |" + " [ -h ]\n", argv[0]); + return -1; + } + + lsock = unix_sock_new(unix_socket); + if (lsock < 0) { + goto err; + } + + csock = accept(lsock, (void *)0, (void *)0); + if (csock < 0) { + fprintf(stderr, "Accept error %s\n", strerror(errno)); + goto err; + } + + vdev_blk = vub_new(blk_file); + if (!vdev_blk) { + goto err; + } + + vug_init(&vdev_blk->parent, csock, vub_panic_cb, &vub_iface); + + g_main_loop_run(vdev_blk->loop); + + vug_deinit(&vdev_blk->parent); + +err: + vub_free(vdev_blk); + if (csock >= 0) { + close(csock); + } + if (lsock >= 0) { + close(lsock); + } + g_free(unix_socket); + g_free(blk_file); + + return 0; +} diff --git a/default-configs/pci.mak b/default-configs/pci.mak index e514bde..49a0f28 100644 --- a/default-configs/pci.mak +++ b/default-configs/pci.mak @@ -43,3 +43,4 @@ CONFIG_VGA_PCI=y CONFIG_IVSHMEM_DEVICE=$(CONFIG_IVSHMEM) CONFIG_ROCKER=y CONFIG_VHOST_USER_SCSI=$(call land,$(CONFIG_VHOST_USER),$(CONFIG_LINUX)) +CONFIG_VHOST_USER_BLK=$(call land,$(CONFIG_VHOST_USER),$(CONFIG_LINUX)) diff --git a/default-configs/s390x-softmmu.mak b/default-configs/s390x-softmmu.mak index 444bf16..2f4bfe7 100644 --- a/default-configs/s390x-softmmu.mak +++ b/default-configs/s390x-softmmu.mak @@ -1,6 +1,7 @@ CONFIG_PCI=y CONFIG_VIRTIO_PCI=$(CONFIG_PCI) CONFIG_VHOST_USER_SCSI=$(call land,$(CONFIG_VHOST_USER),$(CONFIG_LINUX)) +CONFIG_VHOST_USER_BLK=$(call land,$(CONFIG_VHOST_USER),$(CONFIG_LINUX)) CONFIG_VIRTIO=y CONFIG_SCLPCONSOLE=y CONFIG_TERMINAL3270=y diff --git a/docs/interop/vhost-user.txt b/docs/interop/vhost-user.txt index d49444e..9fcf48d 100644 --- a/docs/interop/vhost-user.txt +++ b/docs/interop/vhost-user.txt @@ -119,6 +119,19 @@ Depending on the request type, payload can be: - 3: IOTLB invalidate - 4: IOTLB access fail + * Virtio device config space + ----------------------------------- + | offset | size | flags | payload | + ----------------------------------- + + Offset: a 32-bit offset of virtio device's configuration space + Size: a 32-bit configuration space access size in bytes + Flags: a 32-bit value: + - 0: Vhost master messages used for writeable fields + - 1: Vhost master messages used for live migration + Payload: Size bytes array holding the contents of the virtio + device's configuration space + In QEMU the vhost-user message is implemented with the following struct: typedef struct VhostUserMsg { @@ -132,6 +145,7 @@ typedef struct VhostUserMsg { VhostUserMemory memory; VhostUserLog log; struct vhost_iotlb_msg iotlb; + VhostUserConfig config; }; } QEMU_PACKED VhostUserMsg; @@ -214,8 +228,8 @@ Multiple queue is treated as a protocol extension, hence the slave has to implement protocol features first. The multiple queues feature is supported only when the protocol feature VHOST_USER_PROTOCOL_F_MQ (bit 0) is set. -The max number of queues the slave supports can be queried with message -VHOST_USER_GET_PROTOCOL_FEATURES. Master should stop when the number of +The max number of queue pairs the slave supports can be queried with message +VHOST_USER_GET_QUEUE_NUM. Master should stop when the number of requested queues is bigger than that. As all queues share one connection, the master uses a unique index for each @@ -623,6 +637,32 @@ Master message types and expect this message once (per VQ) during device configuration (ie. before the master starts the VQ). + * VHOST_USER_GET_CONFIG + + Id: 24 + Equivalent ioctl: N/A + Master payload: virtio device config space + Slave payload: virtio device config space + + Submitted by the vhost-user master to fetch the contents of the virtio + device configuration space, vhost-user slave's payload size MUST match + master's request, vhost-user slave uses zero length of payload to + indicate an error to vhost-user master. The vhost-user master may + cache the contents to avoid repeated VHOST_USER_GET_CONFIG calls. + +* VHOST_USER_SET_CONFIG + + Id: 25 + Equivalent ioctl: N/A + Master payload: virtio device config space + Slave payload: N/A + + Submitted by the vhost-user master when the Guest changes the virtio + device configuration space and also can be used for live migration + on the destination host. The vhost-user slave must check the flags + field, and slaves MUST NOT accept SET_CONFIG for read-only + configuration space fields unless the live migration bit is set. + Slave message types ------------------- @@ -641,6 +681,21 @@ Slave message types This request should be send only when VIRTIO_F_IOMMU_PLATFORM feature has been successfully negotiated. +* VHOST_USER_SLAVE_CONFIG_CHANGE_MSG + + Id: 2 + Equivalent ioctl: N/A + Slave payload: N/A + Master payload: N/A + + Vhost-user slave sends such messages to notify that the virtio device's + configuration space has changed, for those host devices which can support + such feature, host driver can send VHOST_USER_GET_CONFIG message to slave + to get the latest content. If VHOST_USER_PROTOCOL_F_REPLY_ACK is + negotiated, and slave set the VHOST_USER_NEED_REPLY flag, master must + respond with zero when operation is successfully completed, or non-zero + otherwise. + VHOST_USER_PROTOCOL_F_REPLY_ACK: ------------------------------- The original vhost-user specification only demands replies for certain diff --git a/hw/block/Makefile.objs b/hw/block/Makefile.objs index e0ed980..4c19a58 100644 --- a/hw/block/Makefile.objs +++ b/hw/block/Makefile.objs @@ -13,3 +13,6 @@ obj-$(CONFIG_SH4) += tc58128.o obj-$(CONFIG_VIRTIO) += virtio-blk.o obj-$(CONFIG_VIRTIO) += dataplane/ +ifeq ($(CONFIG_VIRTIO),y) +obj-$(CONFIG_VHOST_USER_BLK) += vhost-user-blk.o +endif diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c new file mode 100644 index 0000000..b53b4c9 --- /dev/null +++ b/hw/block/vhost-user-blk.c @@ -0,0 +1,359 @@ +/* + * vhost-user-blk host device + * + * Copyright(C) 2017 Intel Corporation. + * + * Authors: + * Changpeng Liu <changpeng.liu@intel.com> + * + * Largely based on the "vhost-user-scsi.c" and "vhost-scsi.c" implemented by: + * Felipe Franciosi <felipe@nutanix.com> + * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com> + * Nicholas Bellinger <nab@risingtidesystems.com> + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "qemu/typedefs.h" +#include "qemu/cutils.h" +#include "qom/object.h" +#include "hw/qdev-core.h" +#include "hw/virtio/vhost.h" +#include "hw/virtio/vhost-user-blk.h" +#include "hw/virtio/virtio.h" +#include "hw/virtio/virtio-bus.h" +#include "hw/virtio/virtio-access.h" + +static const int user_feature_bits[] = { + VIRTIO_BLK_F_SIZE_MAX, + VIRTIO_BLK_F_SEG_MAX, + VIRTIO_BLK_F_GEOMETRY, + VIRTIO_BLK_F_BLK_SIZE, + VIRTIO_BLK_F_TOPOLOGY, + VIRTIO_BLK_F_MQ, + VIRTIO_BLK_F_RO, + VIRTIO_BLK_F_FLUSH, + VIRTIO_BLK_F_CONFIG_WCE, + VIRTIO_F_VERSION_1, + VIRTIO_RING_F_INDIRECT_DESC, + VIRTIO_RING_F_EVENT_IDX, + VIRTIO_F_NOTIFY_ON_EMPTY, + VHOST_INVALID_FEATURE_BIT +}; + +static void vhost_user_blk_update_config(VirtIODevice *vdev, uint8_t *config) +{ + VHostUserBlk *s = VHOST_USER_BLK(vdev); + + memcpy(config, &s->blkcfg, sizeof(struct virtio_blk_config)); +} + +static void vhost_user_blk_set_config(VirtIODevice *vdev, const uint8_t *config) +{ + VHostUserBlk *s = VHOST_USER_BLK(vdev); + struct virtio_blk_config *blkcfg = (struct virtio_blk_config *)config; + int ret; + + if (blkcfg->wce == s->blkcfg.wce) { + return; + } + + ret = vhost_dev_set_config(&s->dev, &blkcfg->wce, + offsetof(struct virtio_blk_config, wce), + sizeof(blkcfg->wce), + VHOST_SET_CONFIG_TYPE_MASTER); + if (ret) { + error_report("set device config space failed"); + return; + } + + s->blkcfg.wce = blkcfg->wce; +} + +static int vhost_user_blk_handle_config_change(struct vhost_dev *dev) +{ + int ret; + struct virtio_blk_config blkcfg; + VHostUserBlk *s = VHOST_USER_BLK(dev->vdev); + + ret = vhost_dev_get_config(dev, (uint8_t *)&blkcfg, + sizeof(struct virtio_blk_config)); + if (ret < 0) { + error_report("get config space failed"); + return -1; + } + + /* valid for resize only */ + if (blkcfg.capacity != s->blkcfg.capacity) { + s->blkcfg.capacity = blkcfg.capacity; + memcpy(dev->vdev->config, &s->blkcfg, sizeof(struct virtio_blk_config)); + virtio_notify_config(dev->vdev); + } + + return 0; +} + +const VhostDevConfigOps blk_ops = { + .vhost_dev_config_notifier = vhost_user_blk_handle_config_change, +}; + +static void vhost_user_blk_start(VirtIODevice *vdev) +{ + VHostUserBlk *s = VHOST_USER_BLK(vdev); + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + int i, ret; + + if (!k->set_guest_notifiers) { + error_report("binding does not support guest notifiers"); + return; + } + + ret = vhost_dev_enable_notifiers(&s->dev, vdev); + if (ret < 0) { + error_report("Error enabling host notifiers: %d", -ret); + return; + } + + ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, true); + if (ret < 0) { + error_report("Error binding guest notifier: %d", -ret); + goto err_host_notifiers; + } + + s->dev.acked_features = vdev->guest_features; + ret = vhost_dev_start(&s->dev, vdev); + if (ret < 0) { + error_report("Error starting vhost: %d", -ret); + goto err_guest_notifiers; + } + + /* guest_notifier_mask/pending not used yet, so just unmask + * everything here. virtio-pci will do the right thing by + * enabling/disabling irqfd. + */ + for (i = 0; i < s->dev.nvqs; i++) { + vhost_virtqueue_mask(&s->dev, vdev, i, false); + } + + return; + +err_guest_notifiers: + k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false); +err_host_notifiers: + vhost_dev_disable_notifiers(&s->dev, vdev); +} + +static void vhost_user_blk_stop(VirtIODevice *vdev) +{ + VHostUserBlk *s = VHOST_USER_BLK(vdev); + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + int ret; + + if (!k->set_guest_notifiers) { + return; + } + + vhost_dev_stop(&s->dev, vdev); + + ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false); + if (ret < 0) { + error_report("vhost guest notifier cleanup failed: %d", ret); + return; + } + + vhost_dev_disable_notifiers(&s->dev, vdev); +} + +static void vhost_user_blk_set_status(VirtIODevice *vdev, uint8_t status) +{ + VHostUserBlk *s = VHOST_USER_BLK(vdev); + bool should_start = status & VIRTIO_CONFIG_S_DRIVER_OK; + + if (!vdev->vm_running) { + should_start = false; + } + + if (s->dev.started == should_start) { + return; + } + + if (should_start) { + vhost_user_blk_start(vdev); + } else { + vhost_user_blk_stop(vdev); + } + +} + +static uint64_t vhost_user_blk_get_features(VirtIODevice *vdev, + uint64_t features, + Error **errp) +{ + VHostUserBlk *s = VHOST_USER_BLK(vdev); + uint64_t get_features; + + /* Turn on pre-defined features */ + virtio_add_feature(&features, VIRTIO_BLK_F_SEG_MAX); + virtio_add_feature(&features, VIRTIO_BLK_F_GEOMETRY); + virtio_add_feature(&features, VIRTIO_BLK_F_TOPOLOGY); + virtio_add_feature(&features, VIRTIO_BLK_F_BLK_SIZE); + virtio_add_feature(&features, VIRTIO_BLK_F_FLUSH); + + if (s->config_wce) { + virtio_add_feature(&features, VIRTIO_BLK_F_CONFIG_WCE); + } + if (s->config_ro) { + virtio_add_feature(&features, VIRTIO_BLK_F_RO); + } + if (s->num_queues > 1) { + virtio_add_feature(&features, VIRTIO_BLK_F_MQ); + } + + get_features = vhost_get_features(&s->dev, user_feature_bits, features); + + return get_features; +} + +static void vhost_user_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) +{ + +} + +static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserBlk *s = VHOST_USER_BLK(vdev); + int i, ret; + + if (!s->chardev.chr) { + error_setg(errp, "vhost-user-blk: chardev is mandatory"); + return; + } + + if (!s->num_queues || s->num_queues > VIRTIO_QUEUE_MAX) { + error_setg(errp, "vhost-user-blk: invalid number of IO queues"); + return; + } + + if (!s->queue_size) { + error_setg(errp, "vhost-user-blk: queue size must be non-zero"); + return; + } + + virtio_init(vdev, "virtio-blk", VIRTIO_ID_BLOCK, + sizeof(struct virtio_blk_config)); + + for (i = 0; i < s->num_queues; i++) { + virtio_add_queue(vdev, s->queue_size, + vhost_user_blk_handle_output); + } + + s->dev.nvqs = s->num_queues; + s->dev.vqs = g_new(struct vhost_virtqueue, s->dev.nvqs); + s->dev.vq_index = 0; + s->dev.backend_features = 0; + + ret = vhost_dev_init(&s->dev, &s->chardev, VHOST_BACKEND_TYPE_USER, 0); + if (ret < 0) { + error_setg(errp, "vhost-user-blk: vhost initialization failed: %s", + strerror(-ret)); + goto virtio_err; + } + + ret = vhost_dev_get_config(&s->dev, (uint8_t *)&s->blkcfg, + sizeof(struct virtio_blk_config)); + if (ret < 0) { + error_setg(errp, "vhost-user-blk: get block config failed"); + goto vhost_err; + } + + if (s->blkcfg.num_queues != s->num_queues) { + s->blkcfg.num_queues = s->num_queues; + } + + vhost_dev_set_config_notifier(&s->dev, &blk_ops); + + return; + +vhost_err: + vhost_dev_cleanup(&s->dev); +virtio_err: + g_free(s->dev.vqs); + virtio_cleanup(vdev); +} + +static void vhost_user_blk_device_unrealize(DeviceState *dev, Error **errp) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserBlk *s = VHOST_USER_BLK(dev); + + vhost_user_blk_set_status(vdev, 0); + vhost_dev_cleanup(&s->dev); + g_free(s->dev.vqs); + virtio_cleanup(vdev); +} + +static void vhost_user_blk_instance_init(Object *obj) +{ + VHostUserBlk *s = VHOST_USER_BLK(obj); + + device_add_bootindex_property(obj, &s->bootindex, "bootindex", + "/disk@0,0", DEVICE(obj), NULL); +} + +static const VMStateDescription vmstate_vhost_user_blk = { + .name = "vhost-user-blk", + .minimum_version_id = 1, + .version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_VIRTIO_DEVICE, + VMSTATE_END_OF_LIST() + }, +}; + +static Property vhost_user_blk_properties[] = { + DEFINE_PROP_CHR("chardev", VHostUserBlk, chardev), + DEFINE_PROP_UINT16("num-queues", VHostUserBlk, num_queues, 1), + DEFINE_PROP_UINT32("queue-size", VHostUserBlk, queue_size, 128), + DEFINE_PROP_BIT("config-wce", VHostUserBlk, config_wce, 0, true), + DEFINE_PROP_BIT("config-ro", VHostUserBlk, config_ro, 0, false), + DEFINE_PROP_END_OF_LIST(), +}; + +static void vhost_user_blk_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); + + dc->props = vhost_user_blk_properties; + dc->vmsd = &vmstate_vhost_user_blk; + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); + vdc->realize = vhost_user_blk_device_realize; + vdc->unrealize = vhost_user_blk_device_unrealize; + vdc->get_config = vhost_user_blk_update_config; + vdc->set_config = vhost_user_blk_set_config; + vdc->get_features = vhost_user_blk_get_features; + vdc->set_status = vhost_user_blk_set_status; +} + +static const TypeInfo vhost_user_blk_info = { + .name = TYPE_VHOST_USER_BLK, + .parent = TYPE_VIRTIO_DEVICE, + .instance_size = sizeof(VHostUserBlk), + .instance_init = vhost_user_blk_instance_init, + .class_init = vhost_user_blk_class_init, +}; + +static void virtio_register_types(void) +{ + type_register_static(&vhost_user_blk_info); +} + +type_init(virtio_register_types) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 18b939e..dc4b2b9 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -2394,7 +2394,7 @@ build_srat(GArray *table_data, BIOSLinker *linker, MachineState *machine) } mem_base = 1ULL << 32; mem_len = next_base - pcms->below_4g_mem_size; - next_base += (1ULL << 32) - pcms->below_4g_mem_size; + next_base = mem_base + mem_len; } numamem = acpi_data_push(table_data, sizeof *numamem); build_srat_memory(numamem, mem_base, mem_len, i - 1, @@ -2473,6 +2473,7 @@ build_dmar_q35(GArray *table_data, BIOSLinker *linker) AcpiDmarDeviceScope *scope = NULL; /* Root complex IOAPIC use one path[0] only */ size_t ioapic_scope_size = sizeof(*scope) + sizeof(scope->path[0]); + IntelIOMMUState *intel_iommu = INTEL_IOMMU_DEVICE(iommu); assert(iommu); if (iommu->intr_supported) { @@ -2480,7 +2481,7 @@ build_dmar_q35(GArray *table_data, BIOSLinker *linker) } dmar = acpi_data_push(table_data, sizeof(*dmar)); - dmar->host_address_width = VTD_HOST_ADDRESS_WIDTH - 1; + dmar->host_address_width = intel_iommu->aw_bits - 1; dmar->flags = dmar_flags; /* DMAR Remapping Hardware Unit Definition structure */ diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c index eeaf0e0..63d46ff 100644 --- a/hw/i386/amd_iommu.c +++ b/hw/i386/amd_iommu.c @@ -1144,18 +1144,9 @@ static void amdvi_realize(DeviceState *dev, Error **err) AMDVIState *s = AMD_IOMMU_DEVICE(dev); X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev); MachineState *ms = MACHINE(qdev_get_machine()); - MachineClass *mc = MACHINE_GET_CLASS(ms); - PCMachineState *pcms = - PC_MACHINE(object_dynamic_cast(OBJECT(ms), TYPE_PC_MACHINE)); - PCIBus *bus; - - if (!pcms) { - error_setg(err, "Machine-type '%s' not supported by amd-iommu", - mc->name); - return; - } + PCMachineState *pcms = PC_MACHINE(ms); + PCIBus *bus = pcms->bus; - bus = pcms->bus; s->iotlb = g_hash_table_new_full(amdvi_uint64_hash, amdvi_uint64_equal, g_free, g_free); diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index fe15d3b..2e841cd 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -521,9 +521,9 @@ static inline dma_addr_t vtd_ce_get_slpt_base(VTDContextEntry *ce) return ce->lo & VTD_CONTEXT_ENTRY_SLPTPTR; } -static inline uint64_t vtd_get_slpte_addr(uint64_t slpte) +static inline uint64_t vtd_get_slpte_addr(uint64_t slpte, uint8_t aw) { - return slpte & VTD_SL_PT_BASE_ADDR_MASK; + return slpte & VTD_SL_PT_BASE_ADDR_MASK(aw); } /* Whether the pte indicates the address of the page frame */ @@ -608,35 +608,29 @@ static inline bool vtd_ce_type_check(X86IOMMUState *x86_iommu, return true; } -static inline uint64_t vtd_iova_limit(VTDContextEntry *ce) +static inline uint64_t vtd_iova_limit(VTDContextEntry *ce, uint8_t aw) { uint32_t ce_agaw = vtd_ce_get_agaw(ce); - return 1ULL << MIN(ce_agaw, VTD_MGAW); + return 1ULL << MIN(ce_agaw, aw); } /* Return true if IOVA passes range check, otherwise false. */ -static inline bool vtd_iova_range_check(uint64_t iova, VTDContextEntry *ce) +static inline bool vtd_iova_range_check(uint64_t iova, VTDContextEntry *ce, + uint8_t aw) { /* * Check if @iova is above 2^X-1, where X is the minimum of MGAW * in CAP_REG and AW in context-entry. */ - return !(iova & ~(vtd_iova_limit(ce) - 1)); -} - -static const uint64_t vtd_paging_entry_rsvd_field[] = { - [0] = ~0ULL, - /* For not large page */ - [1] = 0x800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), - [2] = 0x800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), - [3] = 0x800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), - [4] = 0x880ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), - /* For large page */ - [5] = 0x800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), - [6] = 0x1ff800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), - [7] = 0x3ffff800ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), - [8] = 0x880ULL | ~(VTD_HAW_MASK | VTD_SL_IGN_COM), -}; + return !(iova & ~(vtd_iova_limit(ce, aw) - 1)); +} + +/* + * Rsvd field masks for spte: + * Index [1] to [4] 4k pages + * Index [5] to [8] large pages + */ +static uint64_t vtd_paging_entry_rsvd_field[9]; static bool vtd_slpte_nonzero_rsvd(uint64_t slpte, uint32_t level) { @@ -676,7 +670,7 @@ static VTDBus *vtd_find_as_from_bus_num(IntelIOMMUState *s, uint8_t bus_num) */ static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write, uint64_t *slptep, uint32_t *slpte_level, - bool *reads, bool *writes) + bool *reads, bool *writes, uint8_t aw_bits) { dma_addr_t addr = vtd_ce_get_slpt_base(ce); uint32_t level = vtd_ce_get_level(ce); @@ -684,7 +678,7 @@ static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write, uint64_t slpte; uint64_t access_right_check; - if (!vtd_iova_range_check(iova, ce)) { + if (!vtd_iova_range_check(iova, ce, aw_bits)) { trace_vtd_err_dmar_iova_overflow(iova); return -VTD_FR_ADDR_BEYOND_MGAW; } @@ -721,7 +715,7 @@ static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write, *slpte_level = level; return 0; } - addr = vtd_get_slpte_addr(slpte); + addr = vtd_get_slpte_addr(slpte, aw_bits); level--; } } @@ -739,11 +733,12 @@ typedef int (*vtd_page_walk_hook)(IOMMUTLBEntry *entry, void *private); * @read: whether parent level has read permission * @write: whether parent level has write permission * @notify_unmap: whether we should notify invalid entries + * @aw: maximum address width */ static int vtd_page_walk_level(dma_addr_t addr, uint64_t start, uint64_t end, vtd_page_walk_hook hook_fn, - void *private, uint32_t level, - bool read, bool write, bool notify_unmap) + void *private, uint32_t level, bool read, + bool write, bool notify_unmap, uint8_t aw) { bool read_cur, write_cur, entry_valid; uint32_t offset; @@ -790,7 +785,7 @@ static int vtd_page_walk_level(dma_addr_t addr, uint64_t start, entry.target_as = &address_space_memory; entry.iova = iova & subpage_mask; /* NOTE: this is only meaningful if entry_valid == true */ - entry.translated_addr = vtd_get_slpte_addr(slpte); + entry.translated_addr = vtd_get_slpte_addr(slpte, aw); entry.addr_mask = ~subpage_mask; entry.perm = IOMMU_ACCESS_FLAG(read_cur, write_cur); if (!entry_valid && !notify_unmap) { @@ -810,10 +805,10 @@ static int vtd_page_walk_level(dma_addr_t addr, uint64_t start, trace_vtd_page_walk_skip_perm(iova, iova_next); goto next; } - ret = vtd_page_walk_level(vtd_get_slpte_addr(slpte), iova, + ret = vtd_page_walk_level(vtd_get_slpte_addr(slpte, aw), iova, MIN(iova_next, end), hook_fn, private, level - 1, read_cur, write_cur, - notify_unmap); + notify_unmap, aw); if (ret < 0) { return ret; } @@ -834,25 +829,26 @@ next: * @end: IOVA range end address (start <= addr < end) * @hook_fn: the hook that to be called for each detected area * @private: private data for the hook function + * @aw: maximum address width */ static int vtd_page_walk(VTDContextEntry *ce, uint64_t start, uint64_t end, vtd_page_walk_hook hook_fn, void *private, - bool notify_unmap) + bool notify_unmap, uint8_t aw) { dma_addr_t addr = vtd_ce_get_slpt_base(ce); uint32_t level = vtd_ce_get_level(ce); - if (!vtd_iova_range_check(start, ce)) { + if (!vtd_iova_range_check(start, ce, aw)) { return -VTD_FR_ADDR_BEYOND_MGAW; } - if (!vtd_iova_range_check(end, ce)) { + if (!vtd_iova_range_check(end, ce, aw)) { /* Fix end so that it reaches the maximum */ - end = vtd_iova_limit(ce); + end = vtd_iova_limit(ce, aw); } return vtd_page_walk_level(addr, start, end, hook_fn, private, - level, true, true, notify_unmap); + level, true, true, notify_unmap, aw); } /* Map a device to its corresponding domain (context-entry) */ @@ -874,7 +870,7 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num, return -VTD_FR_ROOT_ENTRY_P; } - if (re.rsvd || (re.val & VTD_ROOT_ENTRY_RSVD)) { + if (re.rsvd || (re.val & VTD_ROOT_ENTRY_RSVD(s->aw_bits))) { trace_vtd_re_invalid(re.rsvd, re.val); return -VTD_FR_ROOT_ENTRY_RSVD; } @@ -891,7 +887,7 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num, } if ((ce->hi & VTD_CONTEXT_ENTRY_RSVD_HI) || - (ce->lo & VTD_CONTEXT_ENTRY_RSVD_LO)) { + (ce->lo & VTD_CONTEXT_ENTRY_RSVD_LO(s->aw_bits))) { trace_vtd_ce_invalid(ce->hi, ce->lo); return -VTD_FR_CONTEXT_ENTRY_RSVD; } @@ -1173,7 +1169,7 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, } ret_fr = vtd_iova_to_slpte(&ce, addr, is_write, &slpte, &level, - &reads, &writes); + &reads, &writes, s->aw_bits); if (ret_fr) { ret_fr = -ret_fr; if (is_fpd_set && vtd_is_qualified_fault(ret_fr)) { @@ -1190,7 +1186,7 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, access_flags, level); out: entry->iova = addr & page_mask; - entry->translated_addr = vtd_get_slpte_addr(slpte) & page_mask; + entry->translated_addr = vtd_get_slpte_addr(slpte, s->aw_bits) & page_mask; entry->addr_mask = ~page_mask; entry->perm = access_flags; return true; @@ -1207,7 +1203,7 @@ static void vtd_root_table_setup(IntelIOMMUState *s) { s->root = vtd_get_quad_raw(s, DMAR_RTADDR_REG); s->root_extended = s->root & VTD_RTADDR_RTT; - s->root &= VTD_RTADDR_ADDR_MASK; + s->root &= VTD_RTADDR_ADDR_MASK(s->aw_bits); trace_vtd_reg_dmar_root(s->root, s->root_extended); } @@ -1223,7 +1219,7 @@ static void vtd_interrupt_remap_table_setup(IntelIOMMUState *s) uint64_t value = 0; value = vtd_get_quad_raw(s, DMAR_IRTA_REG); s->intr_size = 1UL << ((value & VTD_IRTA_SIZE_MASK) + 1); - s->intr_root = value & VTD_IRTA_ADDR_MASK; + s->intr_root = value & VTD_IRTA_ADDR_MASK(s->aw_bits); s->intr_eime = value & VTD_IRTA_EIME; /* Notify global invalidation */ @@ -1399,7 +1395,7 @@ static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s, if (!ret && domain_id == VTD_CONTEXT_ENTRY_DID(ce.hi)) { vtd_page_walk(&ce, addr, addr + (1 << am) * VTD_PAGE_SIZE, vtd_page_invalidate_notify_hook, - (void *)&vtd_as->iommu, true); + (void *)&vtd_as->iommu, true, s->aw_bits); } } } @@ -1479,7 +1475,7 @@ static void vtd_handle_gcmd_qie(IntelIOMMUState *s, bool en) trace_vtd_inv_qi_enable(en); if (en) { - s->iq = iqa_val & VTD_IQA_IQA_MASK; + s->iq = iqa_val & VTD_IQA_IQA_MASK(s->aw_bits); /* 2^(x+8) entries */ s->iq_size = 1UL << ((iqa_val & VTD_IQA_QS) + 8); s->qi_enabled = true; @@ -2410,6 +2406,8 @@ static Property vtd_properties[] = { DEFINE_PROP_ON_OFF_AUTO("eim", IntelIOMMUState, intr_eim, ON_OFF_AUTO_AUTO), DEFINE_PROP_BOOL("x-buggy-eim", IntelIOMMUState, buggy_eim, false), + DEFINE_PROP_UINT8("x-aw-bits", IntelIOMMUState, aw_bits, + VTD_HOST_ADDRESS_WIDTH), DEFINE_PROP_BOOL("caching-mode", IntelIOMMUState, caching_mode, FALSE), DEFINE_PROP_END_OF_LIST(), }; @@ -2765,6 +2763,7 @@ static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n) hwaddr size; hwaddr start = n->start; hwaddr end = n->end; + IntelIOMMUState *s = as->iommu_state; /* * Note: all the codes in this function has a assumption that IOVA @@ -2772,12 +2771,12 @@ static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n) * VT-d spec), otherwise we need to consider overflow of 64 bits. */ - if (end > VTD_ADDRESS_SIZE) { + if (end > VTD_ADDRESS_SIZE(s->aw_bits)) { /* * Don't need to unmap regions that is bigger than the whole * VT-d supported address space size */ - end = VTD_ADDRESS_SIZE; + end = VTD_ADDRESS_SIZE(s->aw_bits); } assert(start <= end); @@ -2789,9 +2788,9 @@ static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n) * suite the minimum available mask. */ int n = 64 - clz64(size); - if (n > VTD_MGAW) { + if (n > s->aw_bits) { /* should not happen, but in case it happens, limit it */ - n = VTD_MGAW; + n = s->aw_bits; } size = 1ULL << n; } @@ -2851,7 +2850,8 @@ static void vtd_iommu_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n) PCI_FUNC(vtd_as->devfn), VTD_CONTEXT_ENTRY_DID(ce.hi), ce.hi, ce.lo); - vtd_page_walk(&ce, 0, ~0ULL, vtd_replay_hook, (void *)n, false); + vtd_page_walk(&ce, 0, ~0ULL, vtd_replay_hook, (void *)n, false, + s->aw_bits); } else { trace_vtd_replay_ce_invalid(bus_n, PCI_SLOT(vtd_as->devfn), PCI_FUNC(vtd_as->devfn)); @@ -2882,10 +2882,27 @@ static void vtd_init(IntelIOMMUState *s) s->qi_enabled = false; s->iq_last_desc_type = VTD_INV_DESC_NONE; s->next_frcd_reg = 0; - s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND | VTD_CAP_MGAW | - VTD_CAP_SAGAW | VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS; + s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND | + VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS | + VTD_CAP_SAGAW_39bit | VTD_CAP_MGAW(s->aw_bits); + if (s->aw_bits == VTD_HOST_AW_48BIT) { + s->cap |= VTD_CAP_SAGAW_48bit; + } s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO; + /* + * Rsvd field masks for spte + */ + vtd_paging_entry_rsvd_field[0] = ~0ULL; + vtd_paging_entry_rsvd_field[1] = VTD_SPTE_PAGE_L1_RSVD_MASK(s->aw_bits); + vtd_paging_entry_rsvd_field[2] = VTD_SPTE_PAGE_L2_RSVD_MASK(s->aw_bits); + vtd_paging_entry_rsvd_field[3] = VTD_SPTE_PAGE_L3_RSVD_MASK(s->aw_bits); + vtd_paging_entry_rsvd_field[4] = VTD_SPTE_PAGE_L4_RSVD_MASK(s->aw_bits); + vtd_paging_entry_rsvd_field[5] = VTD_SPTE_LPAGE_L1_RSVD_MASK(s->aw_bits); + vtd_paging_entry_rsvd_field[6] = VTD_SPTE_LPAGE_L2_RSVD_MASK(s->aw_bits); + vtd_paging_entry_rsvd_field[7] = VTD_SPTE_LPAGE_L3_RSVD_MASK(s->aw_bits); + vtd_paging_entry_rsvd_field[8] = VTD_SPTE_LPAGE_L4_RSVD_MASK(s->aw_bits); + if (x86_iommu->intr_supported) { s->ecap |= VTD_ECAP_IR | VTD_ECAP_MHMV; if (s->intr_eim == ON_OFF_AUTO_ON) { @@ -3021,26 +3038,25 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp) } } + /* Currently only address widths supported are 39 and 48 bits */ + if ((s->aw_bits != VTD_HOST_AW_39BIT) && + (s->aw_bits != VTD_HOST_AW_48BIT)) { + error_setg(errp, "Supported values for x-aw-bits are: %d, %d", + VTD_HOST_AW_39BIT, VTD_HOST_AW_48BIT); + return false; + } + return true; } static void vtd_realize(DeviceState *dev, Error **errp) { MachineState *ms = MACHINE(qdev_get_machine()); - MachineClass *mc = MACHINE_GET_CLASS(ms); - PCMachineState *pcms = - PC_MACHINE(object_dynamic_cast(OBJECT(ms), TYPE_PC_MACHINE)); - PCIBus *bus; + PCMachineState *pcms = PC_MACHINE(ms); + PCIBus *bus = pcms->bus; IntelIOMMUState *s = INTEL_IOMMU_DEVICE(dev); X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev); - if (!pcms) { - error_setg(errp, "Machine-type '%s' not supported by intel-iommu", - mc->name); - return; - } - - bus = pcms->bus; x86_iommu->type = TYPE_INTEL; if (!vtd_decide_config(s, errp)) { diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h index 0e73a65..d084099 100644 --- a/hw/i386/intel_iommu_internal.h +++ b/hw/i386/intel_iommu_internal.h @@ -131,7 +131,7 @@ #define VTD_TLB_DID(val) (((val) >> 32) & VTD_DOMAIN_ID_MASK) /* IVA_REG */ -#define VTD_IVA_ADDR(val) ((val) & ~0xfffULL & ((1ULL << VTD_MGAW) - 1)) +#define VTD_IVA_ADDR(val) ((val) & ~0xfffULL) #define VTD_IVA_AM(val) ((val) & 0x3fULL) /* GCMD_REG */ @@ -172,10 +172,10 @@ /* RTADDR_REG */ #define VTD_RTADDR_RTT (1ULL << 11) -#define VTD_RTADDR_ADDR_MASK (VTD_HAW_MASK ^ 0xfffULL) +#define VTD_RTADDR_ADDR_MASK(aw) (VTD_HAW_MASK(aw) ^ 0xfffULL) /* IRTA_REG */ -#define VTD_IRTA_ADDR_MASK (VTD_HAW_MASK ^ 0xfffULL) +#define VTD_IRTA_ADDR_MASK(aw) (VTD_HAW_MASK(aw) ^ 0xfffULL) #define VTD_IRTA_EIME (1ULL << 11) #define VTD_IRTA_SIZE_MASK (0xfULL) @@ -197,9 +197,8 @@ #define VTD_DOMAIN_ID_SHIFT 16 /* 16-bit domain id for 64K domains */ #define VTD_DOMAIN_ID_MASK ((1UL << VTD_DOMAIN_ID_SHIFT) - 1) #define VTD_CAP_ND (((VTD_DOMAIN_ID_SHIFT - 4) / 2) & 7ULL) -#define VTD_MGAW 39 /* Maximum Guest Address Width */ -#define VTD_ADDRESS_SIZE (1ULL << VTD_MGAW) -#define VTD_CAP_MGAW (((VTD_MGAW - 1) & 0x3fULL) << 16) +#define VTD_ADDRESS_SIZE(aw) (1ULL << (aw)) +#define VTD_CAP_MGAW(aw) ((((aw) - 1) & 0x3fULL) << 16) #define VTD_MAMV 18ULL #define VTD_CAP_MAMV (VTD_MAMV << 48) #define VTD_CAP_PSI (1ULL << 39) @@ -213,13 +212,12 @@ #define VTD_CAP_SAGAW_39bit (0x2ULL << VTD_CAP_SAGAW_SHIFT) /* 48-bit AGAW, 4-level page-table */ #define VTD_CAP_SAGAW_48bit (0x4ULL << VTD_CAP_SAGAW_SHIFT) -#define VTD_CAP_SAGAW VTD_CAP_SAGAW_39bit /* IQT_REG */ #define VTD_IQT_QT(val) (((val) >> 4) & 0x7fffULL) /* IQA_REG */ -#define VTD_IQA_IQA_MASK (VTD_HAW_MASK ^ 0xfffULL) +#define VTD_IQA_IQA_MASK(aw) (VTD_HAW_MASK(aw) ^ 0xfffULL) #define VTD_IQA_QS 0x7ULL /* IQH_REG */ @@ -252,7 +250,7 @@ #define VTD_FRCD_SID_MASK 0xffffULL #define VTD_FRCD_SID(val) ((val) & VTD_FRCD_SID_MASK) /* For the low 64-bit of 128-bit */ -#define VTD_FRCD_FI(val) ((val) & (((1ULL << VTD_MGAW) - 1) ^ 0xfffULL)) +#define VTD_FRCD_FI(val) ((val) & ~0xfffULL) /* DMA Remapping Fault Conditions */ typedef enum VTDFaultReason { @@ -360,8 +358,7 @@ typedef union VTDInvDesc VTDInvDesc; #define VTD_INV_DESC_IOTLB_DOMAIN (2ULL << 4) #define VTD_INV_DESC_IOTLB_PAGE (3ULL << 4) #define VTD_INV_DESC_IOTLB_DID(val) (((val) >> 16) & VTD_DOMAIN_ID_MASK) -#define VTD_INV_DESC_IOTLB_ADDR(val) ((val) & ~0xfffULL & \ - ((1ULL << VTD_MGAW) - 1)) +#define VTD_INV_DESC_IOTLB_ADDR(val) ((val) & ~0xfffULL) #define VTD_INV_DESC_IOTLB_AM(val) ((val) & 0x3fULL) #define VTD_INV_DESC_IOTLB_RSVD_LO 0xffffffff0000ff00ULL #define VTD_INV_DESC_IOTLB_RSVD_HI 0xf80ULL @@ -373,6 +370,24 @@ typedef union VTDInvDesc VTDInvDesc; #define VTD_INV_DESC_DEVICE_IOTLB_RSVD_HI 0xffeULL #define VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO 0xffff0000ffe0fff8 +/* Rsvd field masks for spte */ +#define VTD_SPTE_PAGE_L1_RSVD_MASK(aw) \ + (0x800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM)) +#define VTD_SPTE_PAGE_L2_RSVD_MASK(aw) \ + (0x800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM)) +#define VTD_SPTE_PAGE_L3_RSVD_MASK(aw) \ + (0x800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM)) +#define VTD_SPTE_PAGE_L4_RSVD_MASK(aw) \ + (0x880ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM)) +#define VTD_SPTE_LPAGE_L1_RSVD_MASK(aw) \ + (0x800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM)) +#define VTD_SPTE_LPAGE_L2_RSVD_MASK(aw) \ + (0x1ff800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM)) +#define VTD_SPTE_LPAGE_L3_RSVD_MASK(aw) \ + (0x3ffff800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM)) +#define VTD_SPTE_LPAGE_L4_RSVD_MASK(aw) \ + (0x880ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM)) + /* Information about page-selective IOTLB invalidate */ struct VTDIOTLBPageInvInfo { uint16_t domain_id; @@ -403,7 +418,7 @@ typedef struct VTDRootEntry VTDRootEntry; #define VTD_ROOT_ENTRY_CTP (~0xfffULL) #define VTD_ROOT_ENTRY_NR (VTD_PAGE_SIZE / sizeof(VTDRootEntry)) -#define VTD_ROOT_ENTRY_RSVD (0xffeULL | ~VTD_HAW_MASK) +#define VTD_ROOT_ENTRY_RSVD(aw) (0xffeULL | ~VTD_HAW_MASK(aw)) /* Masks for struct VTDContextEntry */ /* lo */ @@ -415,7 +430,7 @@ typedef struct VTDRootEntry VTDRootEntry; #define VTD_CONTEXT_TT_PASS_THROUGH (2ULL << 2) /* Second Level Page Translation Pointer*/ #define VTD_CONTEXT_ENTRY_SLPTPTR (~0xfffULL) -#define VTD_CONTEXT_ENTRY_RSVD_LO (0xff0ULL | ~VTD_HAW_MASK) +#define VTD_CONTEXT_ENTRY_RSVD_LO(aw) (0xff0ULL | ~VTD_HAW_MASK(aw)) /* hi */ #define VTD_CONTEXT_ENTRY_AW 7ULL /* Adjusted guest-address-width */ #define VTD_CONTEXT_ENTRY_DID(val) (((val) >> 8) & VTD_DOMAIN_ID_MASK) @@ -439,7 +454,7 @@ typedef struct VTDRootEntry VTDRootEntry; #define VTD_SL_RW_MASK 3ULL #define VTD_SL_R 1ULL #define VTD_SL_W (1ULL << 1) -#define VTD_SL_PT_BASE_ADDR_MASK (~(VTD_PAGE_SIZE - 1) & VTD_HAW_MASK) +#define VTD_SL_PT_BASE_ADDR_MASK(aw) (~(VTD_PAGE_SIZE - 1) & VTD_HAW_MASK(aw)) #define VTD_SL_IGN_COM 0xbff0000000000000ULL #endif diff --git a/hw/i386/x86-iommu.c b/hw/i386/x86-iommu.c index 293caf8..8a01a2d 100644 --- a/hw/i386/x86-iommu.c +++ b/hw/i386/x86-iommu.c @@ -21,6 +21,8 @@ #include "hw/sysbus.h" #include "hw/boards.h" #include "hw/i386/x86-iommu.h" +#include "hw/i386/pc.h" +#include "qapi/error.h" #include "qemu/error-report.h" #include "trace.h" @@ -80,7 +82,18 @@ static void x86_iommu_realize(DeviceState *dev, Error **errp) { X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev); X86IOMMUClass *x86_class = X86_IOMMU_GET_CLASS(dev); + MachineState *ms = MACHINE(qdev_get_machine()); + MachineClass *mc = MACHINE_GET_CLASS(ms); + PCMachineState *pcms = + PC_MACHINE(object_dynamic_cast(OBJECT(ms), TYPE_PC_MACHINE)); QLIST_INIT(&x86_iommu->iec_notifiers); + + if (!pcms || !pcms->bus) { + error_setg(errp, "Machine-type '%s' not supported by IOMMU", + mc->name); + return; + } + if (x86_class->realize) { x86_class->realize(dev, errp); } diff --git a/hw/pci-bridge/gen_pcie_root_port.c b/hw/pci-bridge/gen_pcie_root_port.c index ad4e6aa..0e2f2e8 100644 --- a/hw/pci-bridge/gen_pcie_root_port.c +++ b/hw/pci-bridge/gen_pcie_root_port.c @@ -74,8 +74,13 @@ static void gen_rp_realize(DeviceState *dev, Error **errp) PCIDevice *d = PCI_DEVICE(dev); GenPCIERootPort *grp = GEN_PCIE_ROOT_PORT(d); PCIERootPortClass *rpc = PCIE_ROOT_PORT_GET_CLASS(d); + Error *local_err = NULL; - rpc->parent_realize(dev, errp); + rpc->parent_realize(dev, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } int rc = pci_bridge_qemu_reserve_cap_init(d, 0, grp->bus_reserve, grp->io_reserve, grp->mem_reserve, grp->pref32_reserve, diff --git a/hw/pci/shpc.c b/hw/pci/shpc.c index 69fc14b..a8462d4 100644 --- a/hw/pci/shpc.c +++ b/hw/pci/shpc.c @@ -1,6 +1,7 @@ #include "qemu/osdep.h" #include "qapi/error.h" #include "qemu-common.h" +#include "qemu/host-utils.h" #include "qemu/range.h" #include "qemu/error-report.h" #include "hw/pci/shpc.h" @@ -122,16 +123,6 @@ #define SHPC_PCI_TO_IDX(pci_slot) ((pci_slot) - 1) #define SHPC_IDX_TO_PHYSICAL(slot) ((slot) + 1) -static int roundup_pow_of_two(int x) -{ - x |= (x >> 1); - x |= (x >> 2); - x |= (x >> 4); - x |= (x >> 8); - x |= (x >> 16); - return x + 1; -} - static uint16_t shpc_get_status(SHPCDevice *shpc, int slot, uint16_t msk) { uint8_t *status = shpc->config + SHPC_SLOT_STATUS(slot); @@ -656,7 +647,7 @@ int shpc_init(PCIDevice *d, PCIBus *sec_bus, MemoryRegion *bar, int shpc_bar_size(PCIDevice *d) { - return roundup_pow_of_two(SHPC_SLOT_REG(SHPC_MAX_SLOTS)); + return pow2roundup32(SHPC_SLOT_REG(SHPC_MAX_SLOTS)); } void shpc_cleanup(PCIDevice *d, MemoryRegion *bar) diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index 093675e..6eb9798 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -26,6 +26,11 @@ #define VHOST_MEMORY_MAX_NREGIONS 8 #define VHOST_USER_F_PROTOCOL_FEATURES 30 +/* + * Maximum size of virtio device config space + */ +#define VHOST_USER_MAX_CONFIG_SIZE 256 + enum VhostUserProtocolFeature { VHOST_USER_PROTOCOL_F_MQ = 0, VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1, @@ -65,12 +70,15 @@ typedef enum VhostUserRequest { VHOST_USER_SET_SLAVE_REQ_FD = 21, VHOST_USER_IOTLB_MSG = 22, VHOST_USER_SET_VRING_ENDIAN = 23, + VHOST_USER_GET_CONFIG = 24, + VHOST_USER_SET_CONFIG = 25, VHOST_USER_MAX } VhostUserRequest; typedef enum VhostUserSlaveRequest { VHOST_USER_SLAVE_NONE = 0, VHOST_USER_SLAVE_IOTLB_MSG = 1, + VHOST_USER_SLAVE_CONFIG_CHANGE_MSG = 2, VHOST_USER_SLAVE_MAX } VhostUserSlaveRequest; @@ -92,7 +100,19 @@ typedef struct VhostUserLog { uint64_t mmap_offset; } VhostUserLog; -typedef struct VhostUserMsg { +typedef struct VhostUserConfig { + uint32_t offset; + uint32_t size; + uint32_t flags; + uint8_t region[VHOST_USER_MAX_CONFIG_SIZE]; +} VhostUserConfig; + +static VhostUserConfig c __attribute__ ((unused)); +#define VHOST_USER_CONFIG_HDR_SIZE (sizeof(c.offset) \ + + sizeof(c.size) \ + + sizeof(c.flags)) + +typedef struct { VhostUserRequest request; #define VHOST_USER_VERSION_MASK (0x3) @@ -100,7 +120,9 @@ typedef struct VhostUserMsg { #define VHOST_USER_NEED_REPLY_MASK (0x1 << 3) uint32_t flags; uint32_t size; /* the following payload size */ - union { +} QEMU_PACKED VhostUserHeader; + +typedef union { #define VHOST_USER_VRING_IDX_MASK (0xff) #define VHOST_USER_VRING_NOFD_MASK (0x1<<8) uint64_t u64; @@ -109,15 +131,18 @@ typedef struct VhostUserMsg { VhostUserMemory memory; VhostUserLog log; struct vhost_iotlb_msg iotlb; - } payload; + VhostUserConfig config; +} VhostUserPayload; + +typedef struct VhostUserMsg { + VhostUserHeader hdr; + VhostUserPayload payload; } QEMU_PACKED VhostUserMsg; static VhostUserMsg m __attribute__ ((unused)); -#define VHOST_USER_HDR_SIZE (sizeof(m.request) \ - + sizeof(m.flags) \ - + sizeof(m.size)) +#define VHOST_USER_HDR_SIZE (sizeof(VhostUserHeader)) -#define VHOST_USER_PAYLOAD_SIZE (sizeof(m) - VHOST_USER_HDR_SIZE) +#define VHOST_USER_PAYLOAD_SIZE (sizeof(VhostUserPayload)) /* The version of the protocol we support */ #define VHOST_USER_VERSION (0x1) @@ -142,33 +167,33 @@ static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) r = qemu_chr_fe_read_all(chr, p, size); if (r != size) { error_report("Failed to read msg header. Read %d instead of %d." - " Original request %d.", r, size, msg->request); + " Original request %d.", r, size, msg->hdr.request); goto fail; } /* validate received flags */ - if (msg->flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) { + if (msg->hdr.flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) { error_report("Failed to read msg header." - " Flags 0x%x instead of 0x%x.", msg->flags, + " Flags 0x%x instead of 0x%x.", msg->hdr.flags, VHOST_USER_REPLY_MASK | VHOST_USER_VERSION); goto fail; } /* validate message size is sane */ - if (msg->size > VHOST_USER_PAYLOAD_SIZE) { + if (msg->hdr.size > VHOST_USER_PAYLOAD_SIZE) { error_report("Failed to read msg header." - " Size %d exceeds the maximum %zu.", msg->size, + " Size %d exceeds the maximum %zu.", msg->hdr.size, VHOST_USER_PAYLOAD_SIZE); goto fail; } - if (msg->size) { + if (msg->hdr.size) { p += VHOST_USER_HDR_SIZE; - size = msg->size; + size = msg->hdr.size; r = qemu_chr_fe_read_all(chr, p, size); if (r != size) { error_report("Failed to read msg payload." - " Read %d instead of %d.", r, msg->size); + " Read %d instead of %d.", r, msg->hdr.size); goto fail; } } @@ -184,7 +209,7 @@ static int process_message_reply(struct vhost_dev *dev, { VhostUserMsg msg_reply; - if ((msg->flags & VHOST_USER_NEED_REPLY_MASK) == 0) { + if ((msg->hdr.flags & VHOST_USER_NEED_REPLY_MASK) == 0) { return 0; } @@ -192,10 +217,10 @@ static int process_message_reply(struct vhost_dev *dev, return -1; } - if (msg_reply.request != msg->request) { + if (msg_reply.hdr.request != msg->hdr.request) { error_report("Received unexpected msg type." "Expected %d received %d", - msg->request, msg_reply.request); + msg->hdr.request, msg_reply.hdr.request); return -1; } @@ -222,15 +247,15 @@ static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, { struct vhost_user *u = dev->opaque; CharBackend *chr = u->chr; - int ret, size = VHOST_USER_HDR_SIZE + msg->size; + int ret, size = VHOST_USER_HDR_SIZE + msg->hdr.size; /* * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE, * we just need send it once in the first time. For later such * request, we just ignore it. */ - if (vhost_user_one_time_request(msg->request) && dev->vq_index != 0) { - msg->flags &= ~VHOST_USER_NEED_REPLY_MASK; + if (vhost_user_one_time_request(msg->hdr.request) && dev->vq_index != 0) { + msg->hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK; return 0; } @@ -257,11 +282,11 @@ static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base, bool shmfd = virtio_has_feature(dev->protocol_features, VHOST_USER_PROTOCOL_F_LOG_SHMFD); VhostUserMsg msg = { - .request = VHOST_USER_SET_LOG_BASE, - .flags = VHOST_USER_VERSION, + .hdr.request = VHOST_USER_SET_LOG_BASE, + .hdr.flags = VHOST_USER_VERSION, .payload.log.mmap_size = log->size * sizeof(*(log->log)), .payload.log.mmap_offset = 0, - .size = sizeof(msg.payload.log), + .hdr.size = sizeof(msg.payload.log), }; if (shmfd && log->fd != -1) { @@ -273,15 +298,15 @@ static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base, } if (shmfd) { - msg.size = 0; + msg.hdr.size = 0; if (vhost_user_read(dev, &msg) < 0) { return -1; } - if (msg.request != VHOST_USER_SET_LOG_BASE) { + if (msg.hdr.request != VHOST_USER_SET_LOG_BASE) { error_report("Received unexpected msg type. " "Expected %d received %d", - VHOST_USER_SET_LOG_BASE, msg.request); + VHOST_USER_SET_LOG_BASE, msg.hdr.request); return -1; } } @@ -299,12 +324,12 @@ static int vhost_user_set_mem_table(struct vhost_dev *dev, VHOST_USER_PROTOCOL_F_REPLY_ACK); VhostUserMsg msg = { - .request = VHOST_USER_SET_MEM_TABLE, - .flags = VHOST_USER_VERSION, + .hdr.request = VHOST_USER_SET_MEM_TABLE, + .hdr.flags = VHOST_USER_VERSION, }; if (reply_supported) { - msg.flags |= VHOST_USER_NEED_REPLY_MASK; + msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; } for (i = 0; i < dev->mem->nregions; ++i) { @@ -317,11 +342,14 @@ static int vhost_user_set_mem_table(struct vhost_dev *dev, &offset); fd = memory_region_get_fd(mr); if (fd > 0) { + if (fd_num == VHOST_MEMORY_MAX_NREGIONS) { + error_report("Failed preparing vhost-user memory table msg"); + return -1; + } msg.payload.memory.regions[fd_num].userspace_addr = reg->userspace_addr; msg.payload.memory.regions[fd_num].memory_size = reg->memory_size; msg.payload.memory.regions[fd_num].guest_phys_addr = reg->guest_phys_addr; msg.payload.memory.regions[fd_num].mmap_offset = offset; - assert(fd_num < VHOST_MEMORY_MAX_NREGIONS); fds[fd_num++] = fd; } } @@ -334,9 +362,9 @@ static int vhost_user_set_mem_table(struct vhost_dev *dev, return -1; } - msg.size = sizeof(msg.payload.memory.nregions); - msg.size += sizeof(msg.payload.memory.padding); - msg.size += fd_num * sizeof(VhostUserMemoryRegion); + msg.hdr.size = sizeof(msg.payload.memory.nregions); + msg.hdr.size += sizeof(msg.payload.memory.padding); + msg.hdr.size += fd_num * sizeof(VhostUserMemoryRegion); if (vhost_user_write(dev, &msg, fds, fd_num) < 0) { return -1; @@ -353,10 +381,10 @@ static int vhost_user_set_vring_addr(struct vhost_dev *dev, struct vhost_vring_addr *addr) { VhostUserMsg msg = { - .request = VHOST_USER_SET_VRING_ADDR, - .flags = VHOST_USER_VERSION, + .hdr.request = VHOST_USER_SET_VRING_ADDR, + .hdr.flags = VHOST_USER_VERSION, .payload.addr = *addr, - .size = sizeof(msg.payload.addr), + .hdr.size = sizeof(msg.payload.addr), }; if (vhost_user_write(dev, &msg, NULL, 0) < 0) { @@ -372,10 +400,10 @@ static int vhost_user_set_vring_endian(struct vhost_dev *dev, bool cross_endian = virtio_has_feature(dev->protocol_features, VHOST_USER_PROTOCOL_F_CROSS_ENDIAN); VhostUserMsg msg = { - .request = VHOST_USER_SET_VRING_ENDIAN, - .flags = VHOST_USER_VERSION, + .hdr.request = VHOST_USER_SET_VRING_ENDIAN, + .hdr.flags = VHOST_USER_VERSION, .payload.state = *ring, - .size = sizeof(msg.payload.state), + .hdr.size = sizeof(msg.payload.state), }; if (!cross_endian) { @@ -395,10 +423,10 @@ static int vhost_set_vring(struct vhost_dev *dev, struct vhost_vring_state *ring) { VhostUserMsg msg = { - .request = request, - .flags = VHOST_USER_VERSION, + .hdr.request = request, + .hdr.flags = VHOST_USER_VERSION, .payload.state = *ring, - .size = sizeof(msg.payload.state), + .hdr.size = sizeof(msg.payload.state), }; if (vhost_user_write(dev, &msg, NULL, 0) < 0) { @@ -444,10 +472,10 @@ static int vhost_user_get_vring_base(struct vhost_dev *dev, struct vhost_vring_state *ring) { VhostUserMsg msg = { - .request = VHOST_USER_GET_VRING_BASE, - .flags = VHOST_USER_VERSION, + .hdr.request = VHOST_USER_GET_VRING_BASE, + .hdr.flags = VHOST_USER_VERSION, .payload.state = *ring, - .size = sizeof(msg.payload.state), + .hdr.size = sizeof(msg.payload.state), }; if (vhost_user_write(dev, &msg, NULL, 0) < 0) { @@ -458,13 +486,13 @@ static int vhost_user_get_vring_base(struct vhost_dev *dev, return -1; } - if (msg.request != VHOST_USER_GET_VRING_BASE) { + if (msg.hdr.request != VHOST_USER_GET_VRING_BASE) { error_report("Received unexpected msg type. Expected %d received %d", - VHOST_USER_GET_VRING_BASE, msg.request); + VHOST_USER_GET_VRING_BASE, msg.hdr.request); return -1; } - if (msg.size != sizeof(msg.payload.state)) { + if (msg.hdr.size != sizeof(msg.payload.state)) { error_report("Received bad msg size."); return -1; } @@ -481,10 +509,10 @@ static int vhost_set_vring_file(struct vhost_dev *dev, int fds[VHOST_MEMORY_MAX_NREGIONS]; size_t fd_num = 0; VhostUserMsg msg = { - .request = request, - .flags = VHOST_USER_VERSION, + .hdr.request = request, + .hdr.flags = VHOST_USER_VERSION, .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK, - .size = sizeof(msg.payload.u64), + .hdr.size = sizeof(msg.payload.u64), }; if (ioeventfd_enabled() && file->fd > 0) { @@ -515,10 +543,10 @@ static int vhost_user_set_vring_call(struct vhost_dev *dev, static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64) { VhostUserMsg msg = { - .request = request, - .flags = VHOST_USER_VERSION, + .hdr.request = request, + .hdr.flags = VHOST_USER_VERSION, .payload.u64 = u64, - .size = sizeof(msg.payload.u64), + .hdr.size = sizeof(msg.payload.u64), }; if (vhost_user_write(dev, &msg, NULL, 0) < 0) { @@ -543,8 +571,8 @@ static int vhost_user_set_protocol_features(struct vhost_dev *dev, static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64) { VhostUserMsg msg = { - .request = request, - .flags = VHOST_USER_VERSION, + .hdr.request = request, + .hdr.flags = VHOST_USER_VERSION, }; if (vhost_user_one_time_request(request) && dev->vq_index != 0) { @@ -559,13 +587,13 @@ static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64) return -1; } - if (msg.request != request) { + if (msg.hdr.request != request) { error_report("Received unexpected msg type. Expected %d received %d", - request, msg.request); + request, msg.hdr.request); return -1; } - if (msg.size != sizeof(msg.payload.u64)) { + if (msg.hdr.size != sizeof(msg.payload.u64)) { error_report("Received bad msg size."); return -1; } @@ -583,8 +611,8 @@ static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features) static int vhost_user_set_owner(struct vhost_dev *dev) { VhostUserMsg msg = { - .request = VHOST_USER_SET_OWNER, - .flags = VHOST_USER_VERSION, + .hdr.request = VHOST_USER_SET_OWNER, + .hdr.flags = VHOST_USER_VERSION, }; if (vhost_user_write(dev, &msg, NULL, 0) < 0) { @@ -597,8 +625,8 @@ static int vhost_user_set_owner(struct vhost_dev *dev) static int vhost_user_reset_device(struct vhost_dev *dev) { VhostUserMsg msg = { - .request = VHOST_USER_RESET_OWNER, - .flags = VHOST_USER_VERSION, + .hdr.request = VHOST_USER_RESET_OWNER, + .hdr.flags = VHOST_USER_VERSION, }; if (vhost_user_write(dev, &msg, NULL, 0) < 0) { @@ -608,37 +636,56 @@ static int vhost_user_reset_device(struct vhost_dev *dev) return 0; } +static int vhost_user_slave_handle_config_change(struct vhost_dev *dev) +{ + int ret = -1; + + if (!dev->config_ops) { + return -1; + } + + if (dev->config_ops->vhost_dev_config_notifier) { + ret = dev->config_ops->vhost_dev_config_notifier(dev); + } + + return ret; +} + static void slave_read(void *opaque) { struct vhost_dev *dev = opaque; struct vhost_user *u = dev->opaque; - VhostUserMsg msg = { 0, }; + VhostUserHeader hdr = { 0, }; + VhostUserPayload payload = { 0, }; int size, ret = 0; /* Read header */ - size = read(u->slave_fd, &msg, VHOST_USER_HDR_SIZE); + size = read(u->slave_fd, &hdr, VHOST_USER_HDR_SIZE); if (size != VHOST_USER_HDR_SIZE) { error_report("Failed to read from slave."); goto err; } - if (msg.size > VHOST_USER_PAYLOAD_SIZE) { + if (hdr.size > VHOST_USER_PAYLOAD_SIZE) { error_report("Failed to read msg header." - " Size %d exceeds the maximum %zu.", msg.size, + " Size %d exceeds the maximum %zu.", hdr.size, VHOST_USER_PAYLOAD_SIZE); goto err; } /* Read payload */ - size = read(u->slave_fd, &msg.payload, msg.size); - if (size != msg.size) { + size = read(u->slave_fd, &payload, hdr.size); + if (size != hdr.size) { error_report("Failed to read payload from slave."); goto err; } - switch (msg.request) { + switch (hdr.request) { case VHOST_USER_SLAVE_IOTLB_MSG: - ret = vhost_backend_handle_iotlb_msg(dev, &msg.payload.iotlb); + ret = vhost_backend_handle_iotlb_msg(dev, &payload.iotlb); + break; + case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG : + ret = vhost_user_slave_handle_config_change(dev); break; default: error_report("Received unexpected msg type."); @@ -649,15 +696,23 @@ static void slave_read(void *opaque) * REPLY_ACK feature handling. Other reply types has to be managed * directly in their request handlers. */ - if (msg.flags & VHOST_USER_NEED_REPLY_MASK) { - msg.flags &= ~VHOST_USER_NEED_REPLY_MASK; - msg.flags |= VHOST_USER_REPLY_MASK; + if (hdr.flags & VHOST_USER_NEED_REPLY_MASK) { + struct iovec iovec[2]; + + + hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK; + hdr.flags |= VHOST_USER_REPLY_MASK; + + payload.u64 = !!ret; + hdr.size = sizeof(payload.u64); - msg.payload.u64 = !!ret; - msg.size = sizeof(msg.payload.u64); + iovec[0].iov_base = &hdr; + iovec[0].iov_len = VHOST_USER_HDR_SIZE; + iovec[1].iov_base = &payload; + iovec[1].iov_len = hdr.size; - size = write(u->slave_fd, &msg, VHOST_USER_HDR_SIZE + msg.size); - if (size != VHOST_USER_HDR_SIZE + msg.size) { + size = writev(u->slave_fd, iovec, ARRAY_SIZE(iovec)); + if (size != VHOST_USER_HDR_SIZE + hdr.size) { error_report("Failed to send msg reply to slave."); goto err; } @@ -675,8 +730,8 @@ err: static int vhost_setup_slave_channel(struct vhost_dev *dev) { VhostUserMsg msg = { - .request = VHOST_USER_SET_SLAVE_REQ_FD, - .flags = VHOST_USER_VERSION, + .hdr.request = VHOST_USER_SET_SLAVE_REQ_FD, + .hdr.flags = VHOST_USER_VERSION, }; struct vhost_user *u = dev->opaque; int sv[2], ret = 0; @@ -697,7 +752,7 @@ static int vhost_setup_slave_channel(struct vhost_dev *dev) qemu_set_fd_handler(u->slave_fd, slave_read, NULL, dev); if (reply_supported) { - msg.flags |= VHOST_USER_NEED_REPLY_MASK; + msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; } ret = vhost_user_write(dev, &msg, &sv[1], 1); @@ -842,10 +897,10 @@ static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr) /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */ if (virtio_has_feature(dev->protocol_features, VHOST_USER_PROTOCOL_F_RARP)) { - msg.request = VHOST_USER_SEND_RARP; - msg.flags = VHOST_USER_VERSION; + msg.hdr.request = VHOST_USER_SEND_RARP; + msg.hdr.flags = VHOST_USER_VERSION; memcpy((char *)&msg.payload.u64, mac_addr, 6); - msg.size = sizeof(msg.payload.u64); + msg.hdr.size = sizeof(msg.payload.u64); return vhost_user_write(dev, &msg, NULL, 0); } @@ -879,12 +934,12 @@ static int vhost_user_net_set_mtu(struct vhost_dev *dev, uint16_t mtu) return 0; } - msg.request = VHOST_USER_NET_SET_MTU; + msg.hdr.request = VHOST_USER_NET_SET_MTU; msg.payload.u64 = mtu; - msg.size = sizeof(msg.payload.u64); - msg.flags = VHOST_USER_VERSION; + msg.hdr.size = sizeof(msg.payload.u64); + msg.hdr.flags = VHOST_USER_VERSION; if (reply_supported) { - msg.flags |= VHOST_USER_NEED_REPLY_MASK; + msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; } if (vhost_user_write(dev, &msg, NULL, 0) < 0) { @@ -903,9 +958,9 @@ static int vhost_user_send_device_iotlb_msg(struct vhost_dev *dev, struct vhost_iotlb_msg *imsg) { VhostUserMsg msg = { - .request = VHOST_USER_IOTLB_MSG, - .size = sizeof(msg.payload.iotlb), - .flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, + .hdr.request = VHOST_USER_IOTLB_MSG, + .hdr.size = sizeof(msg.payload.iotlb), + .hdr.flags = VHOST_USER_VERSION | VHOST_USER_NEED_REPLY_MASK, .payload.iotlb = *imsg, }; @@ -922,6 +977,83 @@ static void vhost_user_set_iotlb_callback(struct vhost_dev *dev, int enabled) /* No-op as the receive channel is not dedicated to IOTLB messages. */ } +static int vhost_user_get_config(struct vhost_dev *dev, uint8_t *config, + uint32_t config_len) +{ + VhostUserMsg msg = { + .hdr.request = VHOST_USER_GET_CONFIG, + .hdr.flags = VHOST_USER_VERSION, + .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + config_len, + }; + + if (config_len > VHOST_USER_MAX_CONFIG_SIZE) { + return -1; + } + + msg.payload.config.offset = 0; + msg.payload.config.size = config_len; + if (vhost_user_write(dev, &msg, NULL, 0) < 0) { + return -1; + } + + if (vhost_user_read(dev, &msg) < 0) { + return -1; + } + + if (msg.hdr.request != VHOST_USER_GET_CONFIG) { + error_report("Received unexpected msg type. Expected %d received %d", + VHOST_USER_GET_CONFIG, msg.hdr.request); + return -1; + } + + if (msg.hdr.size != VHOST_USER_CONFIG_HDR_SIZE + config_len) { + error_report("Received bad msg size."); + return -1; + } + + memcpy(config, msg.payload.config.region, config_len); + + return 0; +} + +static int vhost_user_set_config(struct vhost_dev *dev, const uint8_t *data, + uint32_t offset, uint32_t size, uint32_t flags) +{ + uint8_t *p; + bool reply_supported = virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_REPLY_ACK); + + VhostUserMsg msg = { + .hdr.request = VHOST_USER_SET_CONFIG, + .hdr.flags = VHOST_USER_VERSION, + .hdr.size = VHOST_USER_CONFIG_HDR_SIZE + size, + }; + + if (reply_supported) { + msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK; + } + + if (size > VHOST_USER_MAX_CONFIG_SIZE) { + return -1; + } + + msg.payload.config.offset = offset, + msg.payload.config.size = size, + msg.payload.config.flags = flags, + p = msg.payload.config.region; + memcpy(p, data, size); + + if (vhost_user_write(dev, &msg, NULL, 0) < 0) { + return -1; + } + + if (reply_supported) { + return process_message_reply(dev, &msg); + } + + return 0; +} + const VhostOps user_ops = { .backend_type = VHOST_BACKEND_TYPE_USER, .vhost_backend_init = vhost_user_init, @@ -948,4 +1080,6 @@ const VhostOps user_ops = { .vhost_net_set_mtu = vhost_user_net_set_mtu, .vhost_set_iotlb_callback = vhost_user_set_iotlb_callback, .vhost_send_device_iotlb_msg = vhost_user_send_device_iotlb_msg, + .vhost_get_config = vhost_user_get_config, + .vhost_set_config = vhost_user_set_config, }; diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index e4290ce..386aef8 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -1505,6 +1505,38 @@ void vhost_ack_features(struct vhost_dev *hdev, const int *feature_bits, } } +int vhost_dev_get_config(struct vhost_dev *hdev, uint8_t *config, + uint32_t config_len) +{ + assert(hdev->vhost_ops); + + if (hdev->vhost_ops->vhost_get_config) { + return hdev->vhost_ops->vhost_get_config(hdev, config, config_len); + } + + return -1; +} + +int vhost_dev_set_config(struct vhost_dev *hdev, const uint8_t *data, + uint32_t offset, uint32_t size, uint32_t flags) +{ + assert(hdev->vhost_ops); + + if (hdev->vhost_ops->vhost_set_config) { + return hdev->vhost_ops->vhost_set_config(hdev, data, offset, + size, flags); + } + + return -1; +} + +void vhost_dev_set_config_notifier(struct vhost_dev *hdev, + const VhostDevConfigOps *ops) +{ + assert(hdev->vhost_ops); + hdev->config_ops = ops; +} + /* Host notifiers must be enabled at this point. */ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev) { diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c index 3042232..8106346 100644 --- a/hw/virtio/virtio-bus.c +++ b/hw/virtio/virtio-bus.c @@ -256,6 +256,15 @@ bool virtio_bus_ioeventfd_enabled(VirtioBusState *bus) return k->ioeventfd_assign && k->ioeventfd_enabled(proxy); } +static void virtio_bus_cleanup_event_notifier(EventNotifier *notifier) +{ + /* Test and clear notifier after disabling event, + * in case poll callback didn't have time to run. + */ + virtio_queue_host_notifier_read(notifier); + event_notifier_cleanup(notifier); +} + /* * This function switches ioeventfd on/off in the device. * The caller must set or clear the handlers for the EventNotifier. @@ -283,19 +292,13 @@ int virtio_bus_set_host_notifier(VirtioBusState *bus, int n, bool assign) r = k->ioeventfd_assign(proxy, notifier, n, true); if (r < 0) { error_report("%s: unable to assign ioeventfd: %d", __func__, r); - goto cleanup_event_notifier; + virtio_bus_cleanup_event_notifier(notifier); } - return 0; } else { + notifier->cleanup = virtio_bus_cleanup_event_notifier; k->ioeventfd_assign(proxy, notifier, n, false); } -cleanup_event_notifier: - /* Test and clear notifier after disabling event, - * in case poll callback didn't have time to run. - */ - virtio_queue_host_notifier_read(notifier); - event_notifier_cleanup(notifier); return r; } diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 6c75cca..9ae10f0 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -1978,6 +1978,58 @@ static const TypeInfo virtio_blk_pci_info = { .class_init = virtio_blk_pci_class_init, }; +#if defined(CONFIG_VHOST_USER) && defined(CONFIG_LINUX) +/* vhost-user-blk */ + +static Property vhost_user_blk_pci_properties[] = { + DEFINE_PROP_UINT32("class", VirtIOPCIProxy, class_code, 0), + DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 2), + DEFINE_PROP_END_OF_LIST(), +}; + +static void vhost_user_blk_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) +{ + VHostUserBlkPCI *dev = VHOST_USER_BLK_PCI(vpci_dev); + DeviceState *vdev = DEVICE(&dev->vdev); + + qdev_set_parent_bus(vdev, BUS(&vpci_dev->bus)); + object_property_set_bool(OBJECT(vdev), true, "realized", errp); +} + +static void vhost_user_blk_pci_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); + PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass); + + set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); + dc->props = vhost_user_blk_pci_properties; + k->realize = vhost_user_blk_pci_realize; + pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET; + pcidev_k->device_id = PCI_DEVICE_ID_VIRTIO_BLOCK; + pcidev_k->revision = VIRTIO_PCI_ABI_VERSION; + pcidev_k->class_id = PCI_CLASS_STORAGE_SCSI; +} + +static void vhost_user_blk_pci_instance_init(Object *obj) +{ + VHostUserBlkPCI *dev = VHOST_USER_BLK_PCI(obj); + + virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev), + TYPE_VHOST_USER_BLK); + object_property_add_alias(obj, "bootindex", OBJECT(&dev->vdev), + "bootindex", &error_abort); +} + +static const TypeInfo vhost_user_blk_pci_info = { + .name = TYPE_VHOST_USER_BLK_PCI, + .parent = TYPE_VIRTIO_PCI, + .instance_size = sizeof(VHostUserBlkPCI), + .instance_init = vhost_user_blk_pci_instance_init, + .class_init = vhost_user_blk_pci_class_init, +}; +#endif + /* virtio-scsi-pci */ static Property virtio_scsi_pci_properties[] = { @@ -2624,6 +2676,9 @@ static void virtio_pci_register_types(void) type_register_static(&virtio_9p_pci_info); #endif type_register_static(&virtio_blk_pci_info); +#if defined(CONFIG_VHOST_USER) && defined(CONFIG_LINUX) + type_register_static(&vhost_user_blk_pci_info); +#endif type_register_static(&virtio_scsi_pci_info); type_register_static(&virtio_balloon_pci_info); type_register_static(&virtio_serial_pci_info); diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h index 12d3a90..813082b 100644 --- a/hw/virtio/virtio-pci.h +++ b/hw/virtio/virtio-pci.h @@ -27,6 +27,9 @@ #include "hw/virtio/virtio-gpu.h" #include "hw/virtio/virtio-crypto.h" #include "hw/virtio/vhost-user-scsi.h" +#if defined(CONFIG_VHOST_USER) && defined(CONFIG_LINUX) +#include "hw/virtio/vhost-user-blk.h" +#endif #ifdef CONFIG_VIRTFS #include "hw/9pfs/virtio-9p.h" @@ -46,6 +49,7 @@ typedef struct VirtIOSerialPCI VirtIOSerialPCI; typedef struct VirtIONetPCI VirtIONetPCI; typedef struct VHostSCSIPCI VHostSCSIPCI; typedef struct VHostUserSCSIPCI VHostUserSCSIPCI; +typedef struct VHostUserBlkPCI VHostUserBlkPCI; typedef struct VirtIORngPCI VirtIORngPCI; typedef struct VirtIOInputPCI VirtIOInputPCI; typedef struct VirtIOInputHIDPCI VirtIOInputHIDPCI; @@ -244,6 +248,20 @@ struct VHostUserSCSIPCI { VHostUserSCSI vdev; }; +#if defined(CONFIG_VHOST_USER) && defined(CONFIG_LINUX) +/* + * vhost-user-blk-pci: This extends VirtioPCIProxy. + */ +#define TYPE_VHOST_USER_BLK_PCI "vhost-user-blk-pci" +#define VHOST_USER_BLK_PCI(obj) \ + OBJECT_CHECK(VHostUserBlkPCI, (obj), TYPE_VHOST_USER_BLK_PCI) + +struct VHostUserBlkPCI { + VirtIOPCIProxy parent_obj; + VHostUserBlk vdev; +}; +#endif + /* * virtio-blk-pci: This extends VirtioPCIProxy. */ diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index d6002ee..3ac3491 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -2574,6 +2574,7 @@ static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev) VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev))); int n, r, err; + memory_region_transaction_begin(); for (n = 0; n < VIRTIO_QUEUE_MAX; n++) { VirtQueue *vq = &vdev->vq[n]; if (!virtio_queue_get_num(vdev, n)) { @@ -2596,6 +2597,7 @@ static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev) } event_notifier_set(&vq->host_notifier); } + memory_region_transaction_commit(); return 0; assign_error: @@ -2609,6 +2611,7 @@ assign_error: r = virtio_bus_set_host_notifier(qbus, n, false); assert(r >= 0); } + memory_region_transaction_commit(); return err; } @@ -2625,6 +2628,7 @@ static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev) VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev))); int n, r; + memory_region_transaction_begin(); for (n = 0; n < VIRTIO_QUEUE_MAX; n++) { VirtQueue *vq = &vdev->vq[n]; @@ -2635,6 +2639,7 @@ static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev) r = virtio_bus_set_host_notifier(qbus, n, false); assert(r >= 0); } + memory_region_transaction_commit(); } void virtio_device_stop_ioeventfd(VirtIODevice *vdev) diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h index ac15e6b..45ec891 100644 --- a/include/hw/i386/intel_iommu.h +++ b/include/hw/i386/intel_iommu.h @@ -46,8 +46,10 @@ #define VTD_SID_TO_DEVFN(sid) ((sid) & 0xff) #define DMAR_REG_SIZE 0x230 -#define VTD_HOST_ADDRESS_WIDTH 39 -#define VTD_HAW_MASK ((1ULL << VTD_HOST_ADDRESS_WIDTH) - 1) +#define VTD_HOST_AW_39BIT 39 +#define VTD_HOST_AW_48BIT 48 +#define VTD_HOST_ADDRESS_WIDTH VTD_HOST_AW_39BIT +#define VTD_HAW_MASK(aw) ((1ULL << (aw)) - 1) #define DMAR_REPORT_F_INTR (1) @@ -302,6 +304,7 @@ struct IntelIOMMUState { bool intr_eime; /* Extended interrupt mode enabled */ OnOffAuto intr_eim; /* Toggle for EIM cabability */ bool buggy_eim; /* Force buggy EIM unless eim=off */ + uint8_t aw_bits; /* Host/IOVA address width (in bits) */ }; /* Find the VTD Address space associated with the given bus pointer, diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h index a7a5f22..592254f 100644 --- a/include/hw/virtio/vhost-backend.h +++ b/include/hw/virtio/vhost-backend.h @@ -20,6 +20,11 @@ typedef enum VhostBackendType { VHOST_BACKEND_TYPE_MAX = 3, } VhostBackendType; +typedef enum VhostSetConfigType { + VHOST_SET_CONFIG_TYPE_MASTER = 0, + VHOST_SET_CONFIG_TYPE_MIGRATION = 1, +} VhostSetConfigType; + struct vhost_dev; struct vhost_log; struct vhost_memory; @@ -84,6 +89,11 @@ typedef void (*vhost_set_iotlb_callback_op)(struct vhost_dev *dev, int enabled); typedef int (*vhost_send_device_iotlb_msg_op)(struct vhost_dev *dev, struct vhost_iotlb_msg *imsg); +typedef int (*vhost_set_config_op)(struct vhost_dev *dev, const uint8_t *data, + uint32_t offset, uint32_t size, + uint32_t flags); +typedef int (*vhost_get_config_op)(struct vhost_dev *dev, uint8_t *config, + uint32_t config_len); typedef struct VhostOps { VhostBackendType backend_type; @@ -118,6 +128,8 @@ typedef struct VhostOps { vhost_vsock_set_running_op vhost_vsock_set_running; vhost_set_iotlb_callback_op vhost_set_iotlb_callback; vhost_send_device_iotlb_msg_op vhost_send_device_iotlb_msg; + vhost_get_config_op vhost_get_config; + vhost_set_config_op vhost_set_config; } VhostOps; extern const VhostOps user_ops; diff --git a/include/hw/virtio/vhost-user-blk.h b/include/hw/virtio/vhost-user-blk.h new file mode 100644 index 0000000..5804cc9 --- /dev/null +++ b/include/hw/virtio/vhost-user-blk.h @@ -0,0 +1,41 @@ +/* + * vhost-user-blk host device + * Copyright(C) 2017 Intel Corporation. + * + * Authors: + * Changpeng Liu <changpeng.liu@intel.com> + * + * Based on vhost-scsi.h, Copyright IBM, Corp. 2011 + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#ifndef VHOST_USER_BLK_H +#define VHOST_USER_BLK_H + +#include "standard-headers/linux/virtio_blk.h" +#include "qemu-common.h" +#include "hw/qdev.h" +#include "hw/block/block.h" +#include "chardev/char-fe.h" +#include "hw/virtio/vhost.h" + +#define TYPE_VHOST_USER_BLK "vhost-user-blk" +#define VHOST_USER_BLK(obj) \ + OBJECT_CHECK(VHostUserBlk, (obj), TYPE_VHOST_USER_BLK) + +typedef struct VHostUserBlk { + VirtIODevice parent_obj; + CharBackend chardev; + int32_t bootindex; + struct virtio_blk_config blkcfg; + uint16_t num_queues; + uint32_t queue_size; + uint32_t config_wce; + uint32_t config_ro; + struct vhost_dev dev; +} VHostUserBlk; + +#endif diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h index 467dc77..1dc2d73 100644 --- a/include/hw/virtio/vhost.h +++ b/include/hw/virtio/vhost.h @@ -46,6 +46,12 @@ struct vhost_iommu { QLIST_ENTRY(vhost_iommu) iommu_next; }; +typedef struct VhostDevConfigOps { + /* Vhost device config space changed callback + */ + int (*vhost_dev_config_notifier)(struct vhost_dev *dev); +} VhostDevConfigOps; + struct vhost_memory; struct vhost_dev { VirtIODevice *vdev; @@ -76,6 +82,7 @@ struct vhost_dev { QLIST_ENTRY(vhost_dev) entry; QLIST_HEAD(, vhost_iommu) iommu_list; IOMMUNotifier n; + const VhostDevConfigOps *config_ops; }; int vhost_dev_init(struct vhost_dev *hdev, void *opaque, @@ -106,4 +113,12 @@ int vhost_net_set_backend(struct vhost_dev *hdev, struct vhost_vring_file *file); int vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write); +int vhost_dev_get_config(struct vhost_dev *dev, uint8_t *config, + uint32_t config_len); +int vhost_dev_set_config(struct vhost_dev *dev, const uint8_t *data, + uint32_t offset, uint32_t size, uint32_t flags); +/* notifier callback in case vhost device config space changed + */ +void vhost_dev_set_config_notifier(struct vhost_dev *dev, + const VhostDevConfigOps *ops); #endif diff --git a/include/qemu/event_notifier.h b/include/qemu/event_notifier.h index 599c99f..b30a454 100644 --- a/include/qemu/event_notifier.h +++ b/include/qemu/event_notifier.h @@ -26,6 +26,7 @@ struct EventNotifier { int rfd; int wfd; #endif + void (*cleanup)(EventNotifier *); }; typedef void EventNotifierHandler(EventNotifier *); diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h index 5ac621c..38da849 100644 --- a/include/qemu/host-utils.h +++ b/include/qemu/host-utils.h @@ -400,6 +400,16 @@ static inline uint64_t pow2ceil(uint64_t value) return 0x8000000000000000ull >> (n - 1); } +static inline uint32_t pow2roundup32(uint32_t x) +{ + x |= (x >> 1); + x |= (x >> 2); + x |= (x >> 4); + x |= (x >> 8); + x |= (x >> 16); + return x + 1; +} + /** * urshift - 128-bit Unsigned Right Shift. * @plow: in/out - lower 64-bit integer. diff --git a/tests/acpi-test-data/pc/DSDT.numamem b/tests/acpi-test-data/pc/DSDT.numamem Binary files differnew file mode 100644 index 0000000..224cfdd --- /dev/null +++ b/tests/acpi-test-data/pc/DSDT.numamem diff --git a/tests/acpi-test-data/pc/SRAT.numamem b/tests/acpi-test-data/pc/SRAT.numamem Binary files differnew file mode 100644 index 0000000..dbc595d --- /dev/null +++ b/tests/acpi-test-data/pc/SRAT.numamem diff --git a/tests/acpi-test-data/q35/DSDT.numamem b/tests/acpi-test-data/q35/DSDT.numamem Binary files differnew file mode 100644 index 0000000..8c9fa44 --- /dev/null +++ b/tests/acpi-test-data/q35/DSDT.numamem diff --git a/tests/acpi-test-data/q35/SRAT.numamem b/tests/acpi-test-data/q35/SRAT.numamem Binary files differnew file mode 100644 index 0000000..dbc595d --- /dev/null +++ b/tests/acpi-test-data/q35/SRAT.numamem diff --git a/tests/bios-tables-test.c b/tests/bios-tables-test.c index e28e0c9..b354aaa 100644 --- a/tests/bios-tables-test.c +++ b/tests/bios-tables-test.c @@ -210,10 +210,15 @@ static void test_acpi_facs_table(test_data *data) ACPI_ASSERT_CMP(facs_table->signature, "FACS"); } -static void test_dst_table(AcpiSdtTable *sdt_table, uint32_t addr) +/** fetch_table + * load ACPI table at @addr into table descriptor @sdt_table + * and check that header checksum matches actual one. + */ +static void fetch_table(AcpiSdtTable *sdt_table, uint32_t addr) { uint8_t checksum; + memset(sdt_table, 0, sizeof(*sdt_table)); ACPI_READ_TABLE_HEADER(&sdt_table->header, addr); sdt_table->aml_len = le32_to_cpu(sdt_table->header.length) @@ -233,17 +238,15 @@ static void test_acpi_dsdt_table(test_data *data) AcpiSdtTable dsdt_table; uint32_t addr = le32_to_cpu(data->fadt_table.dsdt); - memset(&dsdt_table, 0, sizeof(dsdt_table)); - data->tables = g_array_new(false, true, sizeof(AcpiSdtTable)); - - test_dst_table(&dsdt_table, addr); + fetch_table(&dsdt_table, addr); ACPI_ASSERT_CMP(dsdt_table.header.signature, "DSDT"); - /* Place DSDT first */ + /* Since DSDT isn't in RSDT, add DSDT to ASL test tables list manually */ g_array_append_val(data->tables, dsdt_table); } -static void test_acpi_tables(test_data *data) +/* Load all tables and add to test list directly RSDT referenced tables */ +static void fetch_rsdt_referenced_tables(test_data *data) { int tables_nr = data->rsdt_tables_nr - 1; /* fadt is first */ int i; @@ -252,9 +255,10 @@ static void test_acpi_tables(test_data *data) AcpiSdtTable ssdt_table; uint32_t addr; - memset(&ssdt_table, 0, sizeof(ssdt_table)); addr = le32_to_cpu(data->rsdt_tables_addr[i + 1]); /* fadt is first */ - test_dst_table(&ssdt_table, addr); + fetch_table(&ssdt_table, addr); + + /* Add table to ASL test tables list */ g_array_append_val(data->tables, ssdt_table); } } @@ -425,6 +429,7 @@ try_again: return exp_tables; } +/* test the list of tables in @data->tables against reference tables */ static void test_acpi_asl(test_data *data) { int i; @@ -636,13 +641,14 @@ static void test_acpi_one(const char *params, test_data *data) boot_sector_test(); + data->tables = g_array_new(false, true, sizeof(AcpiSdtTable)); test_acpi_rsdp_address(data); test_acpi_rsdp_table(data); test_acpi_rsdt_table(data); test_acpi_fadt_table(data); test_acpi_facs_table(data); test_acpi_dsdt_table(data); - test_acpi_tables(data); + fetch_rsdt_referenced_tables(data); if (iasl) { if (getenv(ACPI_REBUILD_EXPECTED_AML)) { @@ -810,6 +816,28 @@ static void test_acpi_piix4_tcg_memhp(void) free_test_data(&data); } +static void test_acpi_q35_tcg_numamem(void) +{ + test_data data; + + memset(&data, 0, sizeof(data)); + data.machine = MACHINE_Q35; + data.variant = ".numamem"; + test_acpi_one(" -numa node -numa node,mem=128", &data); + free_test_data(&data); +} + +static void test_acpi_piix4_tcg_numamem(void) +{ + test_data data; + + memset(&data, 0, sizeof(data)); + data.machine = MACHINE_PC; + data.variant = ".numamem"; + test_acpi_one(" -numa node -numa node,mem=128", &data); + free_test_data(&data); +} + int main(int argc, char *argv[]) { const char *arch = qtest_get_arch(); @@ -832,6 +860,8 @@ int main(int argc, char *argv[]) qtest_add_func("acpi/q35/cpuhp", test_acpi_q35_tcg_cphp); qtest_add_func("acpi/piix4/memhp", test_acpi_piix4_tcg_memhp); qtest_add_func("acpi/q35/memhp", test_acpi_q35_tcg_memhp); + qtest_add_func("acpi/piix4/numamem", test_acpi_piix4_tcg_numamem); + qtest_add_func("acpi/q35/numamem", test_acpi_q35_tcg_numamem); } ret = g_test_run(); boot_sector_cleanup(disk); diff --git a/tests/vhost-user-test.c b/tests/vhost-user-test.c index e2c89ed..ec6ac9d 100644 --- a/tests/vhost-user-test.c +++ b/tests/vhost-user-test.c @@ -55,6 +55,7 @@ /*********** FROM hw/virtio/vhost-user.c *************************************/ #define VHOST_MEMORY_MAX_NREGIONS 8 +#define VHOST_MAX_VIRTQUEUES 0x100 #define VHOST_USER_F_PROTOCOL_FEATURES 30 #define VHOST_USER_PROTOCOL_F_MQ 0 @@ -141,6 +142,8 @@ enum { typedef struct TestServer { QPCIBus *bus; + QVirtioPCIDevice *dev; + QVirtQueue *vq[VHOST_MAX_VIRTQUEUES]; gchar *socket_path; gchar *mig_path; gchar *chr_name; @@ -155,33 +158,51 @@ typedef struct TestServer { bool test_fail; int test_flags; int queues; + QGuestAllocator *alloc; } TestServer; static const char *tmpfs; static const char *root; -static void init_virtio_dev(TestServer *s) +static void init_virtio_dev(TestServer *s, uint32_t features_mask) { - QVirtioPCIDevice *dev; uint32_t features; + int i; s->bus = qpci_init_pc(NULL); g_assert_nonnull(s->bus); - dev = qvirtio_pci_device_find(s->bus, VIRTIO_ID_NET); - g_assert_nonnull(dev); + s->dev = qvirtio_pci_device_find(s->bus, VIRTIO_ID_NET); + g_assert_nonnull(s->dev); + + qvirtio_pci_device_enable(s->dev); + qvirtio_reset(&s->dev->vdev); + qvirtio_set_acknowledge(&s->dev->vdev); + qvirtio_set_driver(&s->dev->vdev); - qvirtio_pci_device_enable(dev); - qvirtio_reset(&dev->vdev); - qvirtio_set_acknowledge(&dev->vdev); - qvirtio_set_driver(&dev->vdev); + s->alloc = pc_alloc_init(); - features = qvirtio_get_features(&dev->vdev); - features = features & VIRTIO_NET_F_MAC; - qvirtio_set_features(&dev->vdev, features); + for (i = 0; i < s->queues * 2; i++) { + s->vq[i] = qvirtqueue_setup(&s->dev->vdev, s->alloc, i); + } + + features = qvirtio_get_features(&s->dev->vdev); + features = features & features_mask; + qvirtio_set_features(&s->dev->vdev, features); - qvirtio_set_driver_ok(&dev->vdev); - qvirtio_pci_device_free(dev); + qvirtio_set_driver_ok(&s->dev->vdev); +} + +static void uninit_virtio_dev(TestServer *s) +{ + int i; + + for (i = 0; i < s->queues * 2; i++) { + qvirtqueue_cleanup(s->dev->vdev.bus, s->vq[i], s->alloc); + } + pc_alloc_uninit(s->alloc); + + qvirtio_pci_device_free(s->dev); } static void wait_for_fds(TestServer *s) @@ -617,6 +638,30 @@ GSourceFuncs test_migrate_source_funcs = { .check = test_migrate_source_check, }; +static void test_read_guest_mem(void) +{ + TestServer *server = NULL; + char *qemu_cmd = NULL; + QTestState *s = NULL; + + server = test_server_new("test"); + test_server_listen(server); + + qemu_cmd = GET_QEMU_CMD(server); + + s = qtest_start(qemu_cmd); + g_free(qemu_cmd); + + init_virtio_dev(server, 1u << VIRTIO_NET_F_MAC); + + read_guest_mem(server); + + uninit_virtio_dev(server); + + qtest_quit(s); + test_server_free(server); +} + static void test_migrate(void) { TestServer *s = test_server_new("src"); @@ -636,7 +681,7 @@ static void test_migrate(void) from = qtest_start(cmd); g_free(cmd); - init_virtio_dev(s); + init_virtio_dev(s, 1u << VIRTIO_NET_F_MAC); wait_for_fds(s); size = get_log_size(s); g_assert_cmpint(size, ==, (2 * 1024 * 1024) / (VHOST_LOG_PAGE * 8)); @@ -689,6 +734,8 @@ static void test_migrate(void) read_guest_mem(dest); + uninit_virtio_dev(s); + g_source_destroy(source); g_source_unref(source); @@ -756,7 +803,7 @@ static void test_reconnect_subprocess(void) qtest_start(cmd); g_free(cmd); - init_virtio_dev(s); + init_virtio_dev(s, 1u << VIRTIO_NET_F_MAC); wait_for_fds(s); wait_for_rings_started(s, 2); @@ -767,6 +814,8 @@ static void test_reconnect_subprocess(void) wait_for_fds(s); wait_for_rings_started(s, 2); + uninit_virtio_dev(s); + qtest_end(); test_server_free(s); return; @@ -792,10 +841,12 @@ static void test_connect_fail_subprocess(void) qtest_start(cmd); g_free(cmd); - init_virtio_dev(s); + init_virtio_dev(s, 1u << VIRTIO_NET_F_MAC); wait_for_fds(s); wait_for_rings_started(s, 2); + uninit_virtio_dev(s); + qtest_end(); test_server_free(s); } @@ -820,10 +871,12 @@ static void test_flags_mismatch_subprocess(void) qtest_start(cmd); g_free(cmd); - init_virtio_dev(s); + init_virtio_dev(s, 1u << VIRTIO_NET_F_MAC); wait_for_fds(s); wait_for_rings_started(s, 2); + uninit_virtio_dev(s); + qtest_end(); test_server_free(s); } @@ -839,79 +892,30 @@ static void test_flags_mismatch(void) #endif -static QVirtioPCIDevice *virtio_net_pci_init(QPCIBus *bus, int slot) -{ - QVirtioPCIDevice *dev; - - dev = qvirtio_pci_device_find(bus, VIRTIO_ID_NET); - g_assert(dev != NULL); - g_assert_cmphex(dev->vdev.device_type, ==, VIRTIO_ID_NET); - - qvirtio_pci_device_enable(dev); - qvirtio_reset(&dev->vdev); - qvirtio_set_acknowledge(&dev->vdev); - qvirtio_set_driver(&dev->vdev); - - return dev; -} - -static void driver_init(QVirtioDevice *dev) -{ - uint32_t features; - - features = qvirtio_get_features(dev); - features = features & ~(QVIRTIO_F_BAD_FEATURE | - (1u << VIRTIO_RING_F_INDIRECT_DESC) | - (1u << VIRTIO_RING_F_EVENT_IDX)); - qvirtio_set_features(dev, features); - - qvirtio_set_driver_ok(dev); -} - -#define PCI_SLOT 0x04 - static void test_multiqueue(void) { - const int queues = 2; TestServer *s = test_server_new("mq"); - QVirtioPCIDevice *dev; - QPCIBus *bus; - QVirtQueuePCI *vq[queues * 2]; - QGuestAllocator *alloc; char *cmd; - int i; - - s->queues = queues; + uint32_t features_mask = ~(QVIRTIO_F_BAD_FEATURE | + (1u << VIRTIO_RING_F_INDIRECT_DESC) | + (1u << VIRTIO_RING_F_EVENT_IDX)); + s->queues = 2; test_server_listen(s); cmd = g_strdup_printf(QEMU_CMD_MEM QEMU_CMD_CHR QEMU_CMD_NETDEV ",queues=%d " "-device virtio-net-pci,netdev=net0,mq=on,vectors=%d", 512, 512, root, s->chr_name, s->socket_path, "", s->chr_name, - queues, queues * 2 + 2); + s->queues, s->queues * 2 + 2); qtest_start(cmd); g_free(cmd); - bus = qpci_init_pc(NULL); - dev = virtio_net_pci_init(bus, PCI_SLOT); + init_virtio_dev(s, features_mask); - alloc = pc_alloc_init(); - for (i = 0; i < queues * 2; i++) { - vq[i] = (QVirtQueuePCI *)qvirtqueue_setup(&dev->vdev, alloc, i); - } + wait_for_rings_started(s, s->queues * 2); - driver_init(&dev->vdev); - wait_for_rings_started(s, queues * 2); + uninit_virtio_dev(s); - /* End test */ - for (i = 0; i < queues * 2; i++) { - qvirtqueue_cleanup(dev->vdev.bus, &vq[i]->vq, alloc); - } - pc_alloc_uninit(alloc); - qvirtio_pci_device_disable(dev); - g_free(dev->pdev); - g_free(dev); - qpci_free_pc(bus); qtest_end(); test_server_free(s); @@ -919,10 +923,7 @@ static void test_multiqueue(void) int main(int argc, char **argv) { - QTestState *s = NULL; - TestServer *server = NULL; const char *hugefs; - char *qemu_cmd = NULL; int ret; char template[] = "/tmp/vhost-test-XXXXXX"; GMainLoop *loop; @@ -947,20 +948,11 @@ int main(int argc, char **argv) root = tmpfs; } - server = test_server_new("test"); - test_server_listen(server); - loop = g_main_loop_new(NULL, FALSE); /* run the main loop thread so the chardev may operate */ thread = g_thread_new(NULL, thread_function, loop); - qemu_cmd = GET_QEMU_CMD(server); - - s = qtest_start(qemu_cmd); - g_free(qemu_cmd); - init_virtio_dev(server); - - qtest_add_data_func("/vhost-user/read-guest-mem", server, read_guest_mem); + qtest_add_func("/vhost-user/read-guest-mem", test_read_guest_mem); qtest_add_func("/vhost-user/migrate", test_migrate); qtest_add_func("/vhost-user/multiqueue", test_multiqueue); @@ -978,12 +970,7 @@ int main(int argc, char **argv) ret = g_test_run(); - if (s) { - qtest_quit(s); - } - /* cleanup */ - test_server_free(server); /* finish the helper thread and dispatch pending sources */ g_main_loop_quit(loop); diff --git a/util/event_notifier-posix.c b/util/event_notifier-posix.c index 73c4046..6525666 100644 --- a/util/event_notifier-posix.c +++ b/util/event_notifier-posix.c @@ -29,6 +29,7 @@ void event_notifier_init_fd(EventNotifier *e, int fd) { e->rfd = fd; e->wfd = fd; + e->cleanup = NULL; } #endif @@ -65,6 +66,7 @@ int event_notifier_init(EventNotifier *e, int active) e->rfd = fds[0]; e->wfd = fds[1]; } + e->cleanup = NULL; if (active) { event_notifier_set(e); } @@ -80,10 +82,11 @@ void event_notifier_cleanup(EventNotifier *e) { if (e->rfd != e->wfd) { close(e->rfd); - e->rfd = -1; } close(e->wfd); + e->rfd = -1; e->wfd = -1; + e->cleanup = NULL; } int event_notifier_get_fd(const EventNotifier *e) diff --git a/util/event_notifier-win32.c b/util/event_notifier-win32.c index 62c53b0..eff8670 100644 --- a/util/event_notifier-win32.c +++ b/util/event_notifier-win32.c @@ -19,6 +19,7 @@ int event_notifier_init(EventNotifier *e, int active) { e->event = CreateEvent(NULL, TRUE, FALSE, NULL); assert(e->event); + e->cleanup = NULL; return 0; } @@ -26,6 +27,7 @@ void event_notifier_cleanup(EventNotifier *e) { CloseHandle(e->event); e->event = NULL; + e->cleanup = NULL; } HANDLE event_notifier_get_handle(EventNotifier *e) |