/* * vfio protocol over a UNIX socket device handling. * * Copyright © 2018, 2021 Oracle and/or its affiliates. * * SPDX-License-Identifier: GPL-2.0-or-later */ #include "qemu/osdep.h" #include "qapi/error.h" #include "qemu/error-report.h" #include "qemu/lockable.h" #include "qemu/thread.h" #include "hw/vfio-user/device.h" #include "hw/vfio-user/trace.h" /* * These are to defend against a malign server trying * to force us to run out of memory. */ #define VFIO_USER_MAX_REGIONS 100 #define VFIO_USER_MAX_IRQS 50 bool vfio_user_get_device_info(VFIOUserProxy *proxy, struct vfio_device_info *info, Error **errp) { VFIOUserDeviceInfo msg; uint32_t argsz = sizeof(msg) - sizeof(msg.hdr); memset(&msg, 0, sizeof(msg)); vfio_user_request_msg(&msg.hdr, VFIO_USER_DEVICE_GET_INFO, sizeof(msg), 0); msg.argsz = argsz; if (!vfio_user_send_wait(proxy, &msg.hdr, NULL, 0, errp)) { return false; } if (msg.hdr.flags & VFIO_USER_ERROR) { error_setg_errno(errp, -msg.hdr.error_reply, "VFIO_USER_DEVICE_GET_INFO failed"); return false; } trace_vfio_user_get_info(msg.num_regions, msg.num_irqs); memcpy(info, &msg.argsz, argsz); /* defend against a malicious server */ if (info->num_regions > VFIO_USER_MAX_REGIONS || info->num_irqs > VFIO_USER_MAX_IRQS) { error_setg_errno(errp, EINVAL, "invalid reply"); return false; } return true; } void vfio_user_device_reset(VFIOUserProxy *proxy) { Error *local_err = NULL; VFIOUserHdr hdr; vfio_user_request_msg(&hdr, VFIO_USER_DEVICE_RESET, sizeof(hdr), 0); if (!vfio_user_send_wait(proxy, &hdr, NULL, 0, &local_err)) { error_prepend(&local_err, "%s: ", __func__); error_report_err(local_err); return; } if (hdr.flags & VFIO_USER_ERROR) { error_printf("reset reply error %d\n", hdr.error_reply); } } static int vfio_user_get_region_info(VFIOUserProxy *proxy, struct vfio_region_info *info, VFIOUserFDs *fds) { g_autofree VFIOUserRegionInfo *msgp = NULL; Error *local_err = NULL; uint32_t size; /* data returned can be larger than vfio_region_info */ if (info->argsz < sizeof(*info)) { error_printf("vfio_user_get_region_info argsz too small\n"); return -E2BIG; } if (fds != NULL && fds->send_fds != 0) { error_printf("vfio_user_get_region_info can't send FDs\n"); return -EINVAL; } size = info->argsz + sizeof(VFIOUserHdr); msgp = g_malloc0(size); vfio_user_request_msg(&msgp->hdr, VFIO_USER_DEVICE_GET_REGION_INFO, sizeof(*msgp), 0); msgp->argsz = info->argsz; msgp->index = info->index; if (!vfio_user_send_wait(proxy, &msgp->hdr, fds, size, &local_err)) { error_prepend(&local_err, "%s: ", __func__); error_report_err(local_err); return -EFAULT; } if (msgp->hdr.flags & VFIO_USER_ERROR) { return -msgp->hdr.error_reply; } trace_vfio_user_get_region_info(msgp->index, msgp->flags, msgp->size); memcpy(info, &msgp->argsz, info->argsz); /* * If at least one region is directly mapped into the VM, then we can no * longer rely on the sequential nature of vfio-user request handling to * ensure that posted writes are completed before a subsequent read. In this * case, disable posted write support. This is a per-device property, not * per-region. */ if (info->flags & VFIO_REGION_INFO_FLAG_MMAP) { vfio_user_disable_posted_writes(proxy); } return 0; } static int vfio_user_device_io_get_region_info(VFIODevice *vbasedev, struct vfio_region_info *info, int *fd) { VFIOUserFDs fds = { 0, 1, fd}; int ret; if (info->index > vbasedev->num_regions) { return -EINVAL; } ret = vfio_user_get_region_info(vbasedev->proxy, info, &fds); if (ret) { return ret; } /* cap_offset in valid area */ if ((info->flags & VFIO_REGION_INFO_FLAG_CAPS) && (info->cap_offset < sizeof(*info) || info->cap_offset > info->argsz)) { return -EINVAL; } return 0; } static int vfio_user_device_io_get_irq_info(VFIODevice *vbasedev, struct vfio_irq_info *info) { VFIOUserProxy *proxy = vbasedev->proxy; Error *local_err = NULL; VFIOUserIRQInfo msg; memset(&msg, 0, sizeof(msg)); vfio_user_request_msg(&msg.hdr, VFIO_USER_DEVICE_GET_IRQ_INFO, sizeof(msg), 0); msg.argsz = info->argsz; msg.index = info->index; if (!vfio_user_send_wait(proxy, &msg.hdr, NULL, 0, &local_err)) { error_prepend(&local_err, "%s: ", __func__); error_report_err(local_err); return -EFAULT; } if (msg.hdr.flags & VFIO_USER_ERROR) { return -msg.hdr.error_reply; } trace_vfio_user_get_irq_info(msg.index, msg.flags, msg.count); memcpy(info, &msg.argsz, sizeof(*info)); return 0; } static int irq_howmany(int *fdp, uint32_t cur, uint32_t max) { int n = 0; if (fdp[cur] != -1) { do { n++; } while (n < max && fdp[cur + n] != -1); } else { do { n++; } while (n < max && fdp[cur + n] == -1); } return n; } static int vfio_user_device_io_set_irqs(VFIODevice *vbasedev, struct vfio_irq_set *irq) { VFIOUserProxy *proxy = vbasedev->proxy; g_autofree VFIOUserIRQSet *msgp = NULL; uint32_t size, nfds, send_fds, sent_fds, max; Error *local_err = NULL; if (irq->argsz < sizeof(*irq)) { error_printf("vfio_user_set_irqs argsz too small\n"); return -EINVAL; } /* * Handle simple case */ if ((irq->flags & VFIO_IRQ_SET_DATA_EVENTFD) == 0) { size = sizeof(VFIOUserHdr) + irq->argsz; msgp = g_malloc0(size); vfio_user_request_msg(&msgp->hdr, VFIO_USER_DEVICE_SET_IRQS, size, 0); msgp->argsz = irq->argsz; msgp->flags = irq->flags; msgp->index = irq->index; msgp->start = irq->start; msgp->count = irq->count; trace_vfio_user_set_irqs(msgp->index, msgp->start, msgp->count, msgp->flags); if (!vfio_user_send_wait(proxy, &msgp->hdr, NULL, 0, &local_err)) { error_prepend(&local_err, "%s: ", __func__); error_report_err(local_err); return -EFAULT; } if (msgp->hdr.flags & VFIO_USER_ERROR) { return -msgp->hdr.error_reply; } return 0; } /* * Calculate the number of FDs to send * and adjust argsz */ nfds = (irq->argsz - sizeof(*irq)) / sizeof(int); irq->argsz = sizeof(*irq); msgp = g_malloc0(sizeof(*msgp)); /* * Send in chunks if over max_send_fds */ for (sent_fds = 0; nfds > sent_fds; sent_fds += send_fds) { VFIOUserFDs *arg_fds, loop_fds; /* must send all valid FDs or all invalid FDs in single msg */ max = nfds - sent_fds; if (max > proxy->max_send_fds) { max = proxy->max_send_fds; } send_fds = irq_howmany((int *)irq->data, sent_fds, max); vfio_user_request_msg(&msgp->hdr, VFIO_USER_DEVICE_SET_IRQS, sizeof(*msgp), 0); msgp->argsz = irq->argsz; msgp->flags = irq->flags; msgp->index = irq->index; msgp->start = irq->start + sent_fds; msgp->count = send_fds; trace_vfio_user_set_irqs(msgp->index, msgp->start, msgp->count, msgp->flags); loop_fds.send_fds = send_fds; loop_fds.recv_fds = 0; loop_fds.fds = (int *)irq->data + sent_fds; arg_fds = loop_fds.fds[0] != -1 ? &loop_fds : NULL; if (!vfio_user_send_wait(proxy, &msgp->hdr, arg_fds, 0, &local_err)) { error_prepend(&local_err, "%s: ", __func__); error_report_err(local_err); return -EFAULT; } if (msgp->hdr.flags & VFIO_USER_ERROR) { return -msgp->hdr.error_reply; } } return 0; } static int vfio_user_device_io_region_read(VFIODevice *vbasedev, uint8_t index, off_t off, uint32_t count, void *data) { g_autofree VFIOUserRegionRW *msgp = NULL; VFIOUserProxy *proxy = vbasedev->proxy; int size = sizeof(*msgp) + count; Error *local_err = NULL; if (count > proxy->max_xfer_size) { return -EINVAL; } msgp = g_malloc0(size); vfio_user_request_msg(&msgp->hdr, VFIO_USER_REGION_READ, sizeof(*msgp), 0); msgp->offset = off; msgp->region = index; msgp->count = count; trace_vfio_user_region_rw(msgp->region, msgp->offset, msgp->count); if (!vfio_user_send_wait(proxy, &msgp->hdr, NULL, size, &local_err)) { error_prepend(&local_err, "%s: ", __func__); error_report_err(local_err); return -EFAULT; } if (msgp->hdr.flags & VFIO_USER_ERROR) { return -msgp->hdr.error_reply; } else if (msgp->count > count) { return -E2BIG; } else { memcpy(data, &msgp->data, msgp->count); } return msgp->count; } /* * If this is a posted write, and VFIO_PROXY_NO_POST is not set, then we are OK * to send the write to the socket without waiting for the server's reply: * a subsequent read (of any region) will not pass the posted write, as all * messages are handled sequentially. */ static int vfio_user_device_io_region_write(VFIODevice *vbasedev, uint8_t index, off_t off, unsigned count, void *data, bool post) { VFIOUserRegionRW *msgp = NULL; VFIOUserProxy *proxy = vbasedev->proxy; int size = sizeof(*msgp) + count; Error *local_err = NULL; bool can_multi; int flags = 0; int ret; if (count > proxy->max_xfer_size) { return -EINVAL; } if (proxy->flags & VFIO_PROXY_NO_POST) { post = false; } if (post) { flags |= VFIO_USER_NO_REPLY; } /* write eligible to be in a WRITE_MULTI msg ? */ can_multi = (proxy->flags & VFIO_PROXY_USE_MULTI) && post && count <= VFIO_USER_MULTI_DATA; /* * This should be a rare case, so first check without the lock, * if we're wrong, vfio_send_queued() will flush any posted writes * we missed here */ if (proxy->wr_multi != NULL || (proxy->num_outgoing > VFIO_USER_OUT_HIGH && can_multi)) { /* * re-check with lock * * if already building a WRITE_MULTI msg, * add this one if possible else flush pending before * sending the current one * * else if outgoing queue is over the highwater, * start a new WRITE_MULTI message */ WITH_QEMU_LOCK_GUARD(&proxy->lock) { if (proxy->wr_multi != NULL) { if (can_multi) { vfio_user_add_multi(proxy, index, off, count, data); return count; } vfio_user_flush_multi(proxy); } else if (proxy->num_outgoing > VFIO_USER_OUT_HIGH && can_multi) { vfio_user_create_multi(proxy); vfio_user_add_multi(proxy, index, off, count, data); return count; } } } msgp = g_malloc0(size); vfio_user_request_msg(&msgp->hdr, VFIO_USER_REGION_WRITE, size, flags); msgp->offset = off; msgp->region = index; msgp->count = count; memcpy(&msgp->data, data, count); trace_vfio_user_region_rw(msgp->region, msgp->offset, msgp->count); /* async send will free msg after it's sent */ if (post) { if (!vfio_user_send_async(proxy, &msgp->hdr, NULL, &local_err)) { error_prepend(&local_err, "%s: ", __func__); error_report_err(local_err); return -EFAULT; } return count; } if (!vfio_user_send_wait(proxy, &msgp->hdr, NULL, 0, &local_err)) { error_prepend(&local_err, "%s: ", __func__); error_report_err(local_err); g_free(msgp); return -EFAULT; } if (msgp->hdr.flags & VFIO_USER_ERROR) { ret = -msgp->hdr.error_reply; } else { ret = count; } g_free(msgp); return ret; } /* * Socket-based io_ops */ VFIODeviceIOOps vfio_user_device_io_ops_sock = { .get_region_info = vfio_user_device_io_get_region_info, .get_irq_info = vfio_user_device_io_get_irq_info, .set_irqs = vfio_user_device_io_set_irqs, .region_read = vfio_user_device_io_region_read, .region_write = vfio_user_device_io_region_write, };