aboutsummaryrefslogtreecommitdiff
path: root/hw/vfio-user/device.c
diff options
context:
space:
mode:
Diffstat (limited to 'hw/vfio-user/device.c')
-rw-r--r--hw/vfio-user/device.c441
1 files changed, 441 insertions, 0 deletions
diff --git a/hw/vfio-user/device.c b/hw/vfio-user/device.c
new file mode 100644
index 0000000..0609a7d
--- /dev/null
+++ b/hw/vfio-user/device.c
@@ -0,0 +1,441 @@
+/*
+ * vfio protocol over a UNIX socket device handling.
+ *
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+#include "qemu/lockable.h"
+#include "qemu/thread.h"
+
+#include "hw/vfio-user/device.h"
+#include "hw/vfio-user/trace.h"
+
+/*
+ * These are to defend against a malign server trying
+ * to force us to run out of memory.
+ */
+#define VFIO_USER_MAX_REGIONS 100
+#define VFIO_USER_MAX_IRQS 50
+
+bool vfio_user_get_device_info(VFIOUserProxy *proxy,
+ struct vfio_device_info *info, Error **errp)
+{
+ VFIOUserDeviceInfo msg;
+ uint32_t argsz = sizeof(msg) - sizeof(msg.hdr);
+
+ memset(&msg, 0, sizeof(msg));
+ vfio_user_request_msg(&msg.hdr, VFIO_USER_DEVICE_GET_INFO, sizeof(msg), 0);
+ msg.argsz = argsz;
+
+ if (!vfio_user_send_wait(proxy, &msg.hdr, NULL, 0, errp)) {
+ return false;
+ }
+
+ if (msg.hdr.flags & VFIO_USER_ERROR) {
+ error_setg_errno(errp, -msg.hdr.error_reply,
+ "VFIO_USER_DEVICE_GET_INFO failed");
+ return false;
+ }
+
+ trace_vfio_user_get_info(msg.num_regions, msg.num_irqs);
+
+ memcpy(info, &msg.argsz, argsz);
+
+ /* defend against a malicious server */
+ if (info->num_regions > VFIO_USER_MAX_REGIONS ||
+ info->num_irqs > VFIO_USER_MAX_IRQS) {
+ error_setg_errno(errp, EINVAL, "invalid reply");
+ return false;
+ }
+
+ return true;
+}
+
+void vfio_user_device_reset(VFIOUserProxy *proxy)
+{
+ Error *local_err = NULL;
+ VFIOUserHdr hdr;
+
+ vfio_user_request_msg(&hdr, VFIO_USER_DEVICE_RESET, sizeof(hdr), 0);
+
+ if (!vfio_user_send_wait(proxy, &hdr, NULL, 0, &local_err)) {
+ error_prepend(&local_err, "%s: ", __func__);
+ error_report_err(local_err);
+ return;
+ }
+
+ if (hdr.flags & VFIO_USER_ERROR) {
+ error_printf("reset reply error %d\n", hdr.error_reply);
+ }
+}
+
+static int vfio_user_get_region_info(VFIOUserProxy *proxy,
+ struct vfio_region_info *info,
+ VFIOUserFDs *fds)
+{
+ g_autofree VFIOUserRegionInfo *msgp = NULL;
+ Error *local_err = NULL;
+ uint32_t size;
+
+ /* data returned can be larger than vfio_region_info */
+ if (info->argsz < sizeof(*info)) {
+ error_printf("vfio_user_get_region_info argsz too small\n");
+ return -E2BIG;
+ }
+ if (fds != NULL && fds->send_fds != 0) {
+ error_printf("vfio_user_get_region_info can't send FDs\n");
+ return -EINVAL;
+ }
+
+ size = info->argsz + sizeof(VFIOUserHdr);
+ msgp = g_malloc0(size);
+
+ vfio_user_request_msg(&msgp->hdr, VFIO_USER_DEVICE_GET_REGION_INFO,
+ sizeof(*msgp), 0);
+ msgp->argsz = info->argsz;
+ msgp->index = info->index;
+
+ if (!vfio_user_send_wait(proxy, &msgp->hdr, fds, size, &local_err)) {
+ error_prepend(&local_err, "%s: ", __func__);
+ error_report_err(local_err);
+ return -EFAULT;
+ }
+
+ if (msgp->hdr.flags & VFIO_USER_ERROR) {
+ return -msgp->hdr.error_reply;
+ }
+ trace_vfio_user_get_region_info(msgp->index, msgp->flags, msgp->size);
+
+ memcpy(info, &msgp->argsz, info->argsz);
+
+ /*
+ * If at least one region is directly mapped into the VM, then we can no
+ * longer rely on the sequential nature of vfio-user request handling to
+ * ensure that posted writes are completed before a subsequent read. In this
+ * case, disable posted write support. This is a per-device property, not
+ * per-region.
+ */
+ if (info->flags & VFIO_REGION_INFO_FLAG_MMAP) {
+ vfio_user_disable_posted_writes(proxy);
+ }
+
+ return 0;
+}
+
+static int vfio_user_device_io_get_region_info(VFIODevice *vbasedev,
+ struct vfio_region_info *info,
+ int *fd)
+{
+ VFIOUserFDs fds = { 0, 1, fd};
+ int ret;
+
+ if (info->index > vbasedev->num_regions) {
+ return -EINVAL;
+ }
+
+ ret = vfio_user_get_region_info(vbasedev->proxy, info, &fds);
+ if (ret) {
+ return ret;
+ }
+
+ /* cap_offset in valid area */
+ if ((info->flags & VFIO_REGION_INFO_FLAG_CAPS) &&
+ (info->cap_offset < sizeof(*info) || info->cap_offset > info->argsz)) {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int vfio_user_device_io_get_irq_info(VFIODevice *vbasedev,
+ struct vfio_irq_info *info)
+{
+ VFIOUserProxy *proxy = vbasedev->proxy;
+ Error *local_err = NULL;
+ VFIOUserIRQInfo msg;
+
+ memset(&msg, 0, sizeof(msg));
+ vfio_user_request_msg(&msg.hdr, VFIO_USER_DEVICE_GET_IRQ_INFO,
+ sizeof(msg), 0);
+ msg.argsz = info->argsz;
+ msg.index = info->index;
+
+ if (!vfio_user_send_wait(proxy, &msg.hdr, NULL, 0, &local_err)) {
+ error_prepend(&local_err, "%s: ", __func__);
+ error_report_err(local_err);
+ return -EFAULT;
+ }
+
+ if (msg.hdr.flags & VFIO_USER_ERROR) {
+ return -msg.hdr.error_reply;
+ }
+ trace_vfio_user_get_irq_info(msg.index, msg.flags, msg.count);
+
+ memcpy(info, &msg.argsz, sizeof(*info));
+ return 0;
+}
+
+static int irq_howmany(int *fdp, uint32_t cur, uint32_t max)
+{
+ int n = 0;
+
+ if (fdp[cur] != -1) {
+ do {
+ n++;
+ } while (n < max && fdp[cur + n] != -1);
+ } else {
+ do {
+ n++;
+ } while (n < max && fdp[cur + n] == -1);
+ }
+
+ return n;
+}
+
+static int vfio_user_device_io_set_irqs(VFIODevice *vbasedev,
+ struct vfio_irq_set *irq)
+{
+ VFIOUserProxy *proxy = vbasedev->proxy;
+ g_autofree VFIOUserIRQSet *msgp = NULL;
+ uint32_t size, nfds, send_fds, sent_fds, max;
+ Error *local_err = NULL;
+
+ if (irq->argsz < sizeof(*irq)) {
+ error_printf("vfio_user_set_irqs argsz too small\n");
+ return -EINVAL;
+ }
+
+ /*
+ * Handle simple case
+ */
+ if ((irq->flags & VFIO_IRQ_SET_DATA_EVENTFD) == 0) {
+ size = sizeof(VFIOUserHdr) + irq->argsz;
+ msgp = g_malloc0(size);
+
+ vfio_user_request_msg(&msgp->hdr, VFIO_USER_DEVICE_SET_IRQS, size, 0);
+ msgp->argsz = irq->argsz;
+ msgp->flags = irq->flags;
+ msgp->index = irq->index;
+ msgp->start = irq->start;
+ msgp->count = irq->count;
+ trace_vfio_user_set_irqs(msgp->index, msgp->start, msgp->count,
+ msgp->flags);
+
+ if (!vfio_user_send_wait(proxy, &msgp->hdr, NULL, 0, &local_err)) {
+ error_prepend(&local_err, "%s: ", __func__);
+ error_report_err(local_err);
+ return -EFAULT;
+ }
+
+ if (msgp->hdr.flags & VFIO_USER_ERROR) {
+ return -msgp->hdr.error_reply;
+ }
+
+ return 0;
+ }
+
+ /*
+ * Calculate the number of FDs to send
+ * and adjust argsz
+ */
+ nfds = (irq->argsz - sizeof(*irq)) / sizeof(int);
+ irq->argsz = sizeof(*irq);
+ msgp = g_malloc0(sizeof(*msgp));
+ /*
+ * Send in chunks if over max_send_fds
+ */
+ for (sent_fds = 0; nfds > sent_fds; sent_fds += send_fds) {
+ VFIOUserFDs *arg_fds, loop_fds;
+
+ /* must send all valid FDs or all invalid FDs in single msg */
+ max = nfds - sent_fds;
+ if (max > proxy->max_send_fds) {
+ max = proxy->max_send_fds;
+ }
+ send_fds = irq_howmany((int *)irq->data, sent_fds, max);
+
+ vfio_user_request_msg(&msgp->hdr, VFIO_USER_DEVICE_SET_IRQS,
+ sizeof(*msgp), 0);
+ msgp->argsz = irq->argsz;
+ msgp->flags = irq->flags;
+ msgp->index = irq->index;
+ msgp->start = irq->start + sent_fds;
+ msgp->count = send_fds;
+ trace_vfio_user_set_irqs(msgp->index, msgp->start, msgp->count,
+ msgp->flags);
+
+ loop_fds.send_fds = send_fds;
+ loop_fds.recv_fds = 0;
+ loop_fds.fds = (int *)irq->data + sent_fds;
+ arg_fds = loop_fds.fds[0] != -1 ? &loop_fds : NULL;
+
+ if (!vfio_user_send_wait(proxy, &msgp->hdr, arg_fds, 0, &local_err)) {
+ error_prepend(&local_err, "%s: ", __func__);
+ error_report_err(local_err);
+ return -EFAULT;
+ }
+
+ if (msgp->hdr.flags & VFIO_USER_ERROR) {
+ return -msgp->hdr.error_reply;
+ }
+ }
+
+ return 0;
+}
+
+static int vfio_user_device_io_region_read(VFIODevice *vbasedev, uint8_t index,
+ off_t off, uint32_t count,
+ void *data)
+{
+ g_autofree VFIOUserRegionRW *msgp = NULL;
+ VFIOUserProxy *proxy = vbasedev->proxy;
+ int size = sizeof(*msgp) + count;
+ Error *local_err = NULL;
+
+ if (count > proxy->max_xfer_size) {
+ return -EINVAL;
+ }
+
+ msgp = g_malloc0(size);
+ vfio_user_request_msg(&msgp->hdr, VFIO_USER_REGION_READ, sizeof(*msgp), 0);
+ msgp->offset = off;
+ msgp->region = index;
+ msgp->count = count;
+ trace_vfio_user_region_rw(msgp->region, msgp->offset, msgp->count);
+
+ if (!vfio_user_send_wait(proxy, &msgp->hdr, NULL, size, &local_err)) {
+ error_prepend(&local_err, "%s: ", __func__);
+ error_report_err(local_err);
+ return -EFAULT;
+ }
+
+ if (msgp->hdr.flags & VFIO_USER_ERROR) {
+ return -msgp->hdr.error_reply;
+ } else if (msgp->count > count) {
+ return -E2BIG;
+ } else {
+ memcpy(data, &msgp->data, msgp->count);
+ }
+
+ return msgp->count;
+}
+
+/*
+ * If this is a posted write, and VFIO_PROXY_NO_POST is not set, then we are OK
+ * to send the write to the socket without waiting for the server's reply:
+ * a subsequent read (of any region) will not pass the posted write, as all
+ * messages are handled sequentially.
+ */
+static int vfio_user_device_io_region_write(VFIODevice *vbasedev, uint8_t index,
+ off_t off, unsigned count,
+ void *data, bool post)
+{
+ VFIOUserRegionRW *msgp = NULL;
+ VFIOUserProxy *proxy = vbasedev->proxy;
+ int size = sizeof(*msgp) + count;
+ Error *local_err = NULL;
+ bool can_multi;
+ int flags = 0;
+ int ret;
+
+ if (count > proxy->max_xfer_size) {
+ return -EINVAL;
+ }
+
+ if (proxy->flags & VFIO_PROXY_NO_POST) {
+ post = false;
+ }
+
+ if (post) {
+ flags |= VFIO_USER_NO_REPLY;
+ }
+
+ /* write eligible to be in a WRITE_MULTI msg ? */
+ can_multi = (proxy->flags & VFIO_PROXY_USE_MULTI) && post &&
+ count <= VFIO_USER_MULTI_DATA;
+
+ /*
+ * This should be a rare case, so first check without the lock,
+ * if we're wrong, vfio_send_queued() will flush any posted writes
+ * we missed here
+ */
+ if (proxy->wr_multi != NULL ||
+ (proxy->num_outgoing > VFIO_USER_OUT_HIGH && can_multi)) {
+
+ /*
+ * re-check with lock
+ *
+ * if already building a WRITE_MULTI msg,
+ * add this one if possible else flush pending before
+ * sending the current one
+ *
+ * else if outgoing queue is over the highwater,
+ * start a new WRITE_MULTI message
+ */
+ WITH_QEMU_LOCK_GUARD(&proxy->lock) {
+ if (proxy->wr_multi != NULL) {
+ if (can_multi) {
+ vfio_user_add_multi(proxy, index, off, count, data);
+ return count;
+ }
+ vfio_user_flush_multi(proxy);
+ } else if (proxy->num_outgoing > VFIO_USER_OUT_HIGH && can_multi) {
+ vfio_user_create_multi(proxy);
+ vfio_user_add_multi(proxy, index, off, count, data);
+ return count;
+ }
+ }
+ }
+
+ msgp = g_malloc0(size);
+ vfio_user_request_msg(&msgp->hdr, VFIO_USER_REGION_WRITE, size, flags);
+ msgp->offset = off;
+ msgp->region = index;
+ msgp->count = count;
+ memcpy(&msgp->data, data, count);
+ trace_vfio_user_region_rw(msgp->region, msgp->offset, msgp->count);
+
+ /* async send will free msg after it's sent */
+ if (post) {
+ if (!vfio_user_send_async(proxy, &msgp->hdr, NULL, &local_err)) {
+ error_prepend(&local_err, "%s: ", __func__);
+ error_report_err(local_err);
+ return -EFAULT;
+ }
+
+ return count;
+ }
+
+ if (!vfio_user_send_wait(proxy, &msgp->hdr, NULL, 0, &local_err)) {
+ error_prepend(&local_err, "%s: ", __func__);
+ error_report_err(local_err);
+ g_free(msgp);
+ return -EFAULT;
+ }
+
+ if (msgp->hdr.flags & VFIO_USER_ERROR) {
+ ret = -msgp->hdr.error_reply;
+ } else {
+ ret = count;
+ }
+
+ g_free(msgp);
+ return ret;
+}
+
+/*
+ * Socket-based io_ops
+ */
+VFIODeviceIOOps vfio_user_device_io_ops_sock = {
+ .get_region_info = vfio_user_device_io_get_region_info,
+ .get_irq_info = vfio_user_device_io_get_irq_info,
+ .set_irqs = vfio_user_device_io_set_irqs,
+ .region_read = vfio_user_device_io_region_read,
+ .region_write = vfio_user_device_io_region_write,
+
+};