aboutsummaryrefslogtreecommitdiff
path: root/lib/libvfio-user.c
diff options
context:
space:
mode:
authorJohn Levon <john.levon@nutanix.com>2020-11-27 14:48:07 +0000
committerGitHub <noreply@github.com>2020-11-27 14:48:07 +0000
commite94bd44d10d8019ea2c39356363a5743136bdb5d (patch)
tree93f71114f5e57682a5a5a1182f7c1e19ce963ff8 /lib/libvfio-user.c
parent40ac852fec651f54a4be8905ab8bb6b25ddb64e2 (diff)
downloadlibvfio-user-e94bd44d10d8019ea2c39356363a5743136bdb5d.zip
libvfio-user-e94bd44d10d8019ea2c39356363a5743136bdb5d.tar.gz
libvfio-user-e94bd44d10d8019ea2c39356363a5743136bdb5d.tar.bz2
rename to libvfio-user (#128)
The muser name no longer reflects the implementation, and will just serve to confuse. Bite the bullet now, and rename ourselves to reflect the actual implementation. Signed-off-by: John Levon <john.levon@nutanix.com> Reviewed-by: Thanos Makatos <thanos.makatos@nutanix.com> Reviewed-by: Swapnil Ingle <swapnil.ingle@nutanix.com>
Diffstat (limited to 'lib/libvfio-user.c')
-rw-r--r--lib/libvfio-user.c1647
1 files changed, 1647 insertions, 0 deletions
diff --git a/lib/libvfio-user.c b/lib/libvfio-user.c
new file mode 100644
index 0000000..2676362
--- /dev/null
+++ b/lib/libvfio-user.c
@@ -0,0 +1,1647 @@
+/*
+ * Copyright (c) 2019 Nutanix Inc. All rights reserved.
+ *
+ * Authors: Thanos Makatos <thanos@nutanix.com>
+ * Swapnil Ingle <swapnil.ingle@nutanix.com>
+ * Felipe Franciosi <felipe@nutanix.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Nutanix nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ */
+
+#define _GNU_SOURCE
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <assert.h>
+#include <errno.h>
+#include <stddef.h>
+#include <sys/mman.h>
+#include <stdarg.h>
+#include <linux/vfio.h>
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+
+#include "cap.h"
+#include "dma.h"
+#include "libvfio-user.h"
+#include "private.h"
+#include "tran_sock.h"
+#include "migration.h"
+#include "irq.h"
+
+
+void
+vfu_log(vfu_ctx_t *vfu_ctx, vfu_log_lvl_t lvl, const char *fmt, ...)
+{
+ va_list ap;
+ char buf[BUFSIZ];
+ int _errno = errno;
+
+ assert(vfu_ctx != NULL);
+
+ if (vfu_ctx->log == NULL || lvl > vfu_ctx->log_lvl || fmt == NULL) {
+ return;
+ }
+
+ va_start(ap, fmt);
+ vsnprintf(buf, sizeof buf, fmt, ap);
+ va_end(ap);
+ vfu_ctx->log(vfu_ctx->pvt, lvl, buf);
+ errno = _errno;
+}
+
+static inline int ERROR(int err)
+{
+ errno = err;
+ return -1;
+}
+
+static size_t
+get_vfio_caps_size(bool is_migr_reg, struct vfu_sparse_mmap_areas *m)
+{
+ size_t type_size = 0;
+ size_t sparse_size = 0;
+
+ if (is_migr_reg) {
+ type_size = sizeof(struct vfio_region_info_cap_type);
+ }
+
+ if (m != NULL) {
+ sparse_size = sizeof(struct vfio_region_info_cap_sparse_mmap)
+ + (m->nr_mmap_areas * sizeof(struct vfio_region_sparse_mmap_area));
+ }
+
+ return type_size + sparse_size;
+}
+
+/*
+ * Populate the sparse mmap capability information to vfio-client.
+ * Sparse mmap information stays after struct vfio_region_info and cap_offest
+ * points accordingly.
+ */
+static void
+dev_get_caps(vfu_ctx_t *vfu_ctx, vfu_reg_info_t *vfu_reg, bool is_migr_reg,
+ struct vfio_region_info *vfio_reg)
+{
+ struct vfio_info_cap_header *header;
+ struct vfio_region_info_cap_type *type = NULL;
+ struct vfio_region_info_cap_sparse_mmap *sparse = NULL;
+ struct vfu_sparse_mmap_areas *mmap_areas;
+
+ assert(vfu_ctx != NULL);
+ assert(vfio_reg != NULL);
+
+ header = (struct vfio_info_cap_header*)(vfio_reg + 1);
+
+ if (is_migr_reg) {
+ type = (struct vfio_region_info_cap_type*)header;
+ type->header.id = VFIO_REGION_INFO_CAP_TYPE;
+ type->header.version = 1;
+ type->header.next = 0;
+ type->type = VFIO_REGION_TYPE_MIGRATION;
+ type->subtype = VFIO_REGION_SUBTYPE_MIGRATION;
+ vfio_reg->cap_offset = sizeof(struct vfio_region_info);
+ }
+
+ if (vfu_reg->mmap_areas != NULL) {
+ int i, nr_mmap_areas = vfu_reg->mmap_areas->nr_mmap_areas;
+ if (type != NULL) {
+ type->header.next = vfio_reg->cap_offset + sizeof(struct vfio_region_info_cap_type);
+ sparse = (struct vfio_region_info_cap_sparse_mmap*)(type + 1);
+ } else {
+ vfio_reg->cap_offset = sizeof(struct vfio_region_info);
+ sparse = (struct vfio_region_info_cap_sparse_mmap*)header;
+ }
+ sparse->header.id = VFIO_REGION_INFO_CAP_SPARSE_MMAP;
+ sparse->header.version = 1;
+ sparse->header.next = 0;
+ sparse->nr_areas = nr_mmap_areas;
+
+ mmap_areas = vfu_reg->mmap_areas;
+ for (i = 0; i < nr_mmap_areas; i++) {
+ sparse->areas[i].offset = mmap_areas->areas[i].start;
+ sparse->areas[i].size = mmap_areas->areas[i].size;
+ vfu_log(vfu_ctx, VFU_DBG, "%s: area %d %#llx-%#llx", __func__,
+ i, sparse->areas[i].offset,
+ sparse->areas[i].offset + sparse->areas[i].size);
+ }
+ }
+
+ /*
+ * FIXME VFIO_REGION_INFO_FLAG_MMAP is valid if the region is
+ * memory-mappable in general, not only if it supports sparse mmap.
+ */
+ vfio_reg->flags |= VFIO_REGION_INFO_FLAG_MMAP | VFIO_REGION_INFO_FLAG_CAPS;
+}
+
+#define VFU_REGION_SHIFT 40
+#define VFU_REGION_MASK ((1ULL << VFU_REGION_SHIFT) - 1)
+
+uint64_t
+region_to_offset(uint32_t region)
+{
+ return (uint64_t)region << VFU_REGION_SHIFT;
+}
+
+uint32_t
+offset_to_region(uint64_t offset)
+{
+ return (offset >> VFU_REGION_SHIFT) & VFU_REGION_MASK;
+}
+
+#ifdef VFU_VERBOSE_LOGGING
+void
+dump_buffer(const char *prefix, const char *buf, uint32_t count)
+{
+ int i;
+ const size_t bytes_per_line = 0x8;
+
+ if (strcmp(prefix, "")) {
+ fprintf(stderr, "%s\n", prefix);
+ }
+ for (i = 0; i < (int)count; i++) {
+ if (i % bytes_per_line != 0) {
+ fprintf(stderr, " ");
+ }
+ /* TODO valgrind emits a warning if count is 1 */
+ fprintf(stderr,"0x%02x", *(buf + i));
+ if ((i + 1) % bytes_per_line == 0) {
+ fprintf(stderr, "\n");
+ }
+ }
+ if (i % bytes_per_line != 0) {
+ fprintf(stderr, "\n");
+ }
+}
+#else
+#define dump_buffer(prefix, buf, count)
+#endif
+
+static bool
+is_migr_reg(vfu_ctx_t *vfu_ctx, int index)
+{
+ return &vfu_ctx->reg_info[index] == vfu_ctx->migr_reg;
+}
+
+static long
+dev_get_reginfo(vfu_ctx_t *vfu_ctx, uint32_t index,
+ struct vfio_region_info **vfio_reg)
+{
+ vfu_reg_info_t *vfu_reg;
+ size_t caps_size;
+ uint32_t argsz;
+
+ assert(vfu_ctx != NULL);
+ assert(vfio_reg != NULL);
+
+ vfu_reg = &vfu_ctx->reg_info[index];
+
+ if (index >= vfu_ctx->nr_regions) {
+ vfu_log(vfu_ctx, VFU_DBG, "bad region index %d", index);
+ return -EINVAL;
+ }
+
+ caps_size = get_vfio_caps_size(is_migr_reg(vfu_ctx, index),
+ vfu_reg->mmap_areas);
+ argsz = caps_size + sizeof(struct vfio_region_info);
+ *vfio_reg = calloc(1, argsz);
+ if (!*vfio_reg) {
+ return -ENOMEM;
+ }
+ /* FIXME document in the protocol that vfio_req->argsz is ignored */
+ (*vfio_reg)->argsz = argsz;
+ (*vfio_reg)->flags = vfu_reg->flags;
+ (*vfio_reg)->index = index;
+ (*vfio_reg)->offset = region_to_offset((*vfio_reg)->index);
+ (*vfio_reg)->size = vfu_reg->size;
+
+ if (caps_size > 0) {
+ dev_get_caps(vfu_ctx, vfu_reg, is_migr_reg(vfu_ctx, index), *vfio_reg);
+ }
+
+ vfu_log(vfu_ctx, VFU_DBG, "region_info[%d] offset %#llx flags %#x size %llu "
+ "argsz %u",
+ (*vfio_reg)->index, (*vfio_reg)->offset, (*vfio_reg)->flags,
+ (*vfio_reg)->size, (*vfio_reg)->argsz);
+
+ return 0;
+}
+
+int
+vfu_get_region(loff_t pos, size_t count, loff_t *off)
+{
+ int r;
+
+ assert(off != NULL);
+
+ r = offset_to_region(pos);
+ if ((int)offset_to_region(pos + count) != r) {
+ return -ENOENT;
+ }
+ *off = pos - region_to_offset(r);
+
+ return r;
+}
+
+static uint32_t
+region_size(vfu_ctx_t *vfu_ctx, int region)
+{
+ assert(region >= VFU_PCI_DEV_BAR0_REGION_IDX && region <= VFU_PCI_DEV_VGA_REGION_IDX);
+ return vfu_ctx->reg_info[region].size;
+}
+
+static uint32_t
+pci_config_space_size(vfu_ctx_t *vfu_ctx)
+{
+ return region_size(vfu_ctx, VFU_PCI_DEV_CFG_REGION_IDX);
+}
+
+static ssize_t
+handle_pci_config_space_access(vfu_ctx_t *vfu_ctx, char *buf, size_t count,
+ loff_t pos, bool is_write)
+{
+ int ret;
+
+ count = MIN(pci_config_space_size(vfu_ctx), count);
+ if (is_write) {
+ ret = cap_maybe_access(vfu_ctx, vfu_ctx->caps, buf, count, pos);
+ if (ret < 0) {
+ vfu_log(vfu_ctx, VFU_ERR, "bad access to capabilities %#lx-%#lx\n",
+ pos, pos + count);
+ return ret;
+ }
+ } else {
+ memcpy(buf, vfu_ctx->pci_config_space->raw + pos, count);
+ }
+ return count;
+}
+
+static ssize_t
+do_access(vfu_ctx_t *vfu_ctx, char *buf, uint8_t count, uint64_t pos, bool is_write)
+{
+ int idx;
+ loff_t offset;
+
+ assert(vfu_ctx != NULL);
+ assert(buf != NULL);
+ assert(count == 1 || count == 2 || count == 4 || count == 8);
+
+ idx = vfu_get_region(pos, count, &offset);
+ if (idx < 0) {
+ vfu_log(vfu_ctx, VFU_ERR, "invalid region %d", idx);
+ return idx;
+ }
+
+ if (idx < 0 || idx >= (int)vfu_ctx->nr_regions) {
+ vfu_log(vfu_ctx, VFU_ERR, "bad region %d", idx);
+ return -EINVAL;
+ }
+
+ if (idx == VFU_PCI_DEV_CFG_REGION_IDX) {
+ return handle_pci_config_space_access(vfu_ctx, buf, count, offset,
+ is_write);
+ }
+
+ if (is_migr_reg(vfu_ctx, idx)) {
+ if (offset + count > vfu_ctx->reg_info[idx].size) {
+ vfu_log(vfu_ctx, VFU_ERR, "read %#lx-%#lx past end of migration region (%#x)",
+ offset, offset + count - 1,
+ vfu_ctx->reg_info[idx].size);
+ return -EINVAL;
+ }
+ return handle_migration_region_access(vfu_ctx, vfu_ctx->pvt,
+ vfu_ctx->migration,
+ buf, count, offset, is_write);
+ }
+
+ /*
+ * Checking whether a callback exists might sound expensive however this
+ * code is not performance critical. This works well when we don't expect a
+ * region to be used, so the user of the library can simply leave the
+ * callback NULL in vfu_create_ctx.
+ */
+ if (vfu_ctx->reg_info[idx].fn != NULL) {
+ return vfu_ctx->reg_info[idx].fn(vfu_ctx->pvt, buf, count, offset,
+ is_write);
+ }
+
+ vfu_log(vfu_ctx, VFU_ERR, "no callback for region %d", idx);
+
+ return -EINVAL;
+}
+
+/*
+ * Returns the number of bytes processed on success or a negative number on
+ * error.
+ *
+ * TODO function naming, general cleanup of access path
+ * FIXME we must be able to return values up to uint32_t bit, or negative on
+ * error. Better to make return value an int and return the number of bytes
+ * processed via an argument.
+ */
+static ssize_t
+_vfu_access(vfu_ctx_t *vfu_ctx, char *buf, uint32_t count, uint64_t *ppos,
+ bool is_write)
+{
+ uint32_t done = 0;
+ int ret;
+
+ assert(vfu_ctx != NULL);
+ /* buf and ppos can be NULL if count is 0 */
+
+ while (count) {
+ size_t size;
+ /*
+ * Limit accesses to qword and enforce alignment. Figure out whether
+ * the PCI spec requires this
+ * FIXME while this makes sense for registers, we might be able to relax
+ * this requirement and make some transfers more efficient. Maybe make
+ * this a per-region option that can be set by the user?
+ */
+ if (count >= 8 && !(*ppos % 8)) {
+ size = 8;
+ } else if (count >= 4 && !(*ppos % 4)) {
+ size = 4;
+ } else if (count >= 2 && !(*ppos % 2)) {
+ size = 2;
+ } else {
+ size = 1;
+ }
+ ret = do_access(vfu_ctx, buf, size, *ppos, is_write);
+ if (ret <= 0) {
+ vfu_log(vfu_ctx, VFU_ERR, "failed to %s %#lx-%#lx: %s",
+ is_write ? "write to" : "read from", *ppos, *ppos + size - 1,
+ strerror(-ret));
+ /*
+ * TODO if ret < 0 then it might contain a legitimate error code, why replace it with EFAULT?
+ */
+ return -EFAULT;
+ }
+ if (ret != (int)size) {
+ vfu_log(vfu_ctx, VFU_DBG, "bad read %d != %ld", ret, size);
+ }
+ count -= size;
+ done += size;
+ *ppos += size;
+ buf += size;
+ }
+ return done;
+}
+
+static inline int
+vfu_access(vfu_ctx_t *vfu_ctx, bool is_write, char *rwbuf, uint32_t count,
+ uint64_t *pos)
+{
+ uint32_t processed = 0, _count;
+ int ret;
+
+ assert(vfu_ctx != NULL);
+ assert(rwbuf != NULL);
+ assert(pos != NULL);
+
+ vfu_log(vfu_ctx, VFU_DBG, "%s %#lx-%#lx", is_write ? "W" : "R", *pos,
+ *pos + count - 1);
+
+#ifdef VFU_VERBOSE_LOGGING
+ if (is_write) {
+ dump_buffer("buffer write", rwbuf, count);
+ }
+#endif
+
+ _count = count;
+ ret = vfu_pci_hdr_access(vfu_ctx, &_count, pos, is_write, rwbuf);
+ if (ret != 0) {
+ /* FIXME shouldn't we fail here? */
+ vfu_log(vfu_ctx, VFU_ERR, "failed to access PCI header: %s",
+ strerror(-ret));
+#ifdef VFU_VERBOSE_LOGGING
+ dump_buffer("buffer write", rwbuf, _count);
+#endif
+ }
+
+ /*
+ * count is how much has been processed by vfu_pci_hdr_access,
+ * _count is how much there's left to be processed by vfu_access
+ */
+ processed = count - _count;
+ ret = _vfu_access(vfu_ctx, rwbuf + processed, _count, pos, is_write);
+ if (ret >= 0) {
+ ret += processed;
+#ifdef VFU_VERBOSE_LOGGING
+ if (!is_write && err == ret) {
+ dump_buffer("buffer read", rwbuf, ret);
+ }
+#endif
+ }
+
+ return ret;
+}
+
+/* TODO merge with dev_get_reginfo */
+static int
+handle_device_get_region_info(vfu_ctx_t *vfu_ctx, uint32_t size,
+ struct vfio_region_info *reg_info_in,
+ struct vfio_region_info **reg_info_out)
+{
+ if (size != sizeof(*reg_info_in) || size != reg_info_in->argsz) {
+ return -EINVAL;
+ }
+
+ return dev_get_reginfo(vfu_ctx, reg_info_in->index, reg_info_out);
+}
+
+static int
+handle_device_get_info(vfu_ctx_t *vfu_ctx, uint32_t size,
+ struct vfio_device_info *dev_info)
+{
+ assert(vfu_ctx != NULL);
+ assert(dev_info != NULL);
+
+ if (size != sizeof *dev_info) {
+ return -EINVAL;
+ }
+
+ dev_info->argsz = sizeof *dev_info;
+ dev_info->flags = VFIO_DEVICE_FLAGS_PCI | VFIO_DEVICE_FLAGS_RESET;
+ dev_info->num_regions = vfu_ctx->nr_regions;
+ dev_info->num_irqs = VFU_DEV_NUM_IRQS;
+
+ vfu_log(vfu_ctx, VFU_DBG, "sent devinfo flags %#x, num_regions %d, num_irqs"
+ " %d", dev_info->flags, dev_info->num_regions, dev_info->num_irqs);
+
+ return 0;
+}
+
+/*
+ * Handles a DMA map/unmap request.
+ *
+ * @vfu_ctx: LM context
+ * @size: size, in bytes, of the memory pointed to be @dma_regions
+ * @map: whether this is a DMA map operation
+ * @fds: array of file descriptors. It's length must equal the number of DMA
+ regions, irrespectively if @nr_fds is 0.
+ * @nr_fds: size of above array. It must be either 0 or exactly match
+ * the number of DMA regions in @dma_regions.
+ * @dma_regions: memory that contains the DMA regions to be mapped/unmapped
+ *
+ * @returns 0 on success, -errno on failure.
+ */
+int
+handle_dma_map_or_unmap(vfu_ctx_t *vfu_ctx, uint32_t size, bool map,
+ int *fds, int nr_fds,
+ struct vfio_user_dma_region *dma_regions)
+{
+ int nr_dma_regions;
+ int ret, i, fdi;
+
+ assert(vfu_ctx != NULL);
+ assert(fds != NULL);
+
+ if (vfu_ctx->dma == NULL) {
+ return 0;
+ }
+
+ if (size % sizeof(struct vfio_user_dma_region) != 0) {
+ vfu_log(vfu_ctx, VFU_ERR, "bad size of DMA regions %d", size);
+ return -EINVAL;
+ }
+
+ nr_dma_regions = (int)(size / sizeof(struct vfio_user_dma_region));
+
+ for (i = 0, fdi = 0; i < nr_dma_regions; i++) {
+ if (map) {
+ int fd = -1;
+ if (dma_regions[i].flags == VFIO_USER_F_DMA_REGION_MAPPABLE) {
+ if (fdi == nr_fds) {
+ return -EINVAL;
+ }
+ fd = fds[fdi++];
+ }
+
+ ret = dma_controller_add_region(vfu_ctx->dma,
+ dma_regions[i].addr,
+ dma_regions[i].size,
+ fd,
+ dma_regions[i].offset);
+ if (ret < 0) {
+ vfu_log(vfu_ctx, VFU_INF,
+ "failed to add DMA region %#lx-%#lx offset=%#lx fd=%d: %s",
+ dma_regions[i].addr,
+ dma_regions[i].addr + dma_regions[i].size - 1,
+ dma_regions[i].offset, fd,
+ strerror(-ret));
+ } else {
+ vfu_log(vfu_ctx, VFU_DBG,
+ "added DMA region %#lx-%#lx offset=%#lx fd=%d",
+ dma_regions[i].addr,
+ dma_regions[i].addr + dma_regions[i].size - 1,
+ dma_regions[i].offset, fd);
+ }
+ } else {
+ ret = dma_controller_remove_region(vfu_ctx->dma,
+ dma_regions[i].addr,
+ dma_regions[i].size,
+ vfu_ctx->unmap_dma, vfu_ctx->pvt);
+ if (ret < 0) {
+ vfu_log(vfu_ctx, VFU_INF,
+ "failed to remove DMA region %#lx-%#lx: %s",
+ dma_regions[i].addr,
+ dma_regions[i].addr + dma_regions[i].size - 1,
+ strerror(-ret));
+ } else {
+ vfu_log(vfu_ctx, VFU_DBG,
+ "removed DMA region %#lx-%#lx",
+ dma_regions[i].addr,
+ dma_regions[i].addr + dma_regions[i].size - 1);
+ }
+ }
+ if (ret < 0) {
+ return ret;
+ }
+ if (vfu_ctx->map_dma != NULL) {
+ vfu_ctx->map_dma(vfu_ctx->pvt, dma_regions[i].addr,
+ dma_regions[i].size);
+ }
+ }
+ return 0;
+}
+
+static int
+handle_device_reset(vfu_ctx_t *vfu_ctx)
+{
+ vfu_log(vfu_ctx, VFU_DBG, "Device reset called by client");
+ if (vfu_ctx->reset != NULL) {
+ return vfu_ctx->reset(vfu_ctx->pvt);
+ }
+ return 0;
+}
+
+static int
+validate_region_access(vfu_ctx_t *vfu_ctx, uint32_t size, uint16_t cmd,
+ struct vfio_user_region_access *region_access)
+{
+ assert(region_access != NULL);
+
+ if (size < sizeof *region_access) {
+ vfu_log(vfu_ctx, VFU_ERR, "message size too small (%d)", size);
+ return -EINVAL;
+ }
+
+ if (region_access->region > vfu_ctx->nr_regions || region_access->count <= 0) {
+ vfu_log(vfu_ctx, VFU_ERR, "bad region %d and/or count %d",
+ region_access->region, region_access->count);
+ return -EINVAL;
+ }
+
+ if (device_is_stopped_and_copying(vfu_ctx->migration) &&
+ !is_migr_reg(vfu_ctx, region_access->region)) {
+ vfu_log(vfu_ctx, VFU_ERR,
+ "cannot access region %d while device in stop-and-copy state",
+ region_access->region);
+ return -EINVAL;
+ }
+
+ if (cmd == VFIO_USER_REGION_WRITE &&
+ size - sizeof *region_access != region_access->count)
+ {
+ vfu_log(vfu_ctx, VFU_ERR, "bad region access, expected %lu, actual %d",
+ size - sizeof *region_access, region_access->count);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int
+handle_region_access(vfu_ctx_t *vfu_ctx, uint32_t size, uint16_t cmd,
+ void **data, size_t *len,
+ struct vfio_user_region_access *region_access)
+{
+ uint64_t count, offset;
+ int ret;
+ char *buf;
+
+ assert(vfu_ctx != NULL);
+ assert(data != NULL);
+ assert(region_access != NULL);
+
+ ret = validate_region_access(vfu_ctx, size, cmd, region_access);
+ if (ret < 0) {
+ return ret;
+ }
+
+ *len = sizeof *region_access;
+ if (cmd == VFIO_USER_REGION_READ) {
+ *len += region_access->count;
+ }
+ *data = malloc(*len);
+ if (*data == NULL) {
+ return -ENOMEM;
+ }
+ if (cmd == VFIO_USER_REGION_READ) {
+ buf = (char*)(((struct vfio_user_region_access*)(*data)) + 1);
+ } else {
+ buf = (char*)(region_access + 1);
+ }
+
+ count = region_access->count;
+ offset = region_to_offset(region_access->region) + region_access->offset;
+
+ ret = vfu_access(vfu_ctx, cmd == VFIO_USER_REGION_WRITE, buf, count, &offset);
+ if (ret != (int)region_access->count) {
+ vfu_log(vfu_ctx, VFU_ERR, "failed to %s %#x-%#lx: %d",
+ cmd == VFIO_USER_REGION_WRITE ? "write" : "read",
+ region_access->count,
+ region_access->offset + region_access->count - 1, ret);
+ /* FIXME we should return whatever has been accessed, not an error */
+ if (ret >= 0) {
+ ret = -EINVAL;
+ }
+ return ret;
+ }
+
+ region_access = *data;
+ region_access->count = ret;
+
+ return 0;
+}
+
+static int
+handle_dirty_pages_get(vfu_ctx_t *vfu_ctx,
+ struct iovec **iovecs, size_t *nr_iovecs,
+ struct vfio_iommu_type1_dirty_bitmap_get *ranges,
+ uint32_t size)
+{
+ int ret = -EINVAL;
+ size_t i;
+
+ assert(vfu_ctx != NULL);
+ assert(iovecs != NULL);
+ assert(nr_iovecs != NULL);
+ assert(ranges != NULL);
+
+ if (size % sizeof(struct vfio_iommu_type1_dirty_bitmap_get) != 0) {
+ return -EINVAL;
+ }
+ *nr_iovecs = 1 + size / sizeof(struct vfio_iommu_type1_dirty_bitmap_get);
+ *iovecs = malloc(*nr_iovecs * sizeof(struct iovec));
+ if (*iovecs == NULL) {
+ return -ENOMEM;
+ }
+
+ for (i = 1; i < *nr_iovecs; i++) {
+ struct vfio_iommu_type1_dirty_bitmap_get *r = &ranges[(i - 1)]; /* FIXME ugly indexing */
+ ret = dma_controller_dirty_page_get(vfu_ctx->dma, r->iova, r->size,
+ r->bitmap.pgsize, r->bitmap.size,
+ (char**)&((*iovecs)[i].iov_base));
+ if (ret != 0) {
+ goto out;
+ }
+ (*iovecs)[i].iov_len = r->bitmap.size;
+ }
+out:
+ if (ret != 0) {
+ if (*iovecs != NULL) {
+ free(*iovecs);
+ *iovecs = NULL;
+ }
+ }
+ return ret;
+}
+
+static int
+handle_dirty_pages(vfu_ctx_t *vfu_ctx, uint32_t size,
+ struct iovec **iovecs, size_t *nr_iovecs,
+ struct vfio_iommu_type1_dirty_bitmap *dirty_bitmap)
+{
+ int ret;
+
+ assert(vfu_ctx != NULL);
+ assert(iovecs != NULL);
+ assert(nr_iovecs != NULL);
+ assert(dirty_bitmap != NULL);
+
+ if (size < sizeof *dirty_bitmap || size != dirty_bitmap->argsz) {
+ vfu_log(vfu_ctx, VFU_ERR, "invalid header size %u", size);
+ return -EINVAL;
+ }
+
+ if (dirty_bitmap->flags & VFIO_IOMMU_DIRTY_PAGES_FLAG_START) {
+ ret = dma_controller_dirty_page_logging_start(vfu_ctx->dma,
+ migration_get_pgsize(vfu_ctx->migration));
+ } else if (dirty_bitmap->flags & VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP) {
+ ret = dma_controller_dirty_page_logging_stop(vfu_ctx->dma);
+ } else if (dirty_bitmap->flags & VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP) {
+ ret = handle_dirty_pages_get(vfu_ctx, iovecs, nr_iovecs,
+ (struct vfio_iommu_type1_dirty_bitmap_get*)(dirty_bitmap + 1),
+ size - sizeof *dirty_bitmap);
+ } else {
+ vfu_log(vfu_ctx, VFU_ERR, "bad flags %#x", dirty_bitmap->flags);
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+/*
+ * FIXME return value is messed up, sometimes we return -1 and set errno while
+ * other times we return -errno. Fix.
+ */
+
+/*
+ * Returns 0 if the header is valid, -errno otherwise.
+ */
+static int
+validate_header(vfu_ctx_t *vfu_ctx, struct vfio_user_header *hdr, size_t size)
+{
+ assert(hdr != NULL);
+
+ if (size < sizeof hdr) {
+ vfu_log(vfu_ctx, VFU_ERR, "short header read %ld", size);
+ return -EINVAL;
+ }
+
+ if (hdr->flags.type != VFIO_USER_F_TYPE_COMMAND) {
+ vfu_log(vfu_ctx, VFU_ERR, "header not a request");
+ return -EINVAL;
+ }
+
+ if (hdr->msg_size < sizeof hdr) {
+ vfu_log(vfu_ctx, VFU_ERR, "bad size in header %d", hdr->msg_size);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/*
+ * Populates @hdr to contain the header for the next command to be processed.
+ * Stores any passed FDs into @fds and the number in @nr_fds.
+ *
+ * Returns 0 if there is no command to process, -errno if an error occured, or
+ * the number of bytes read.
+ */
+static int
+get_next_command(vfu_ctx_t *vfu_ctx, struct vfio_user_header *hdr, int *fds,
+ int *nr_fds)
+{
+ int ret;
+
+ /* FIXME get request shouldn't set errno, it should return it as -errno */
+ ret = vfu_ctx->trans->get_request(vfu_ctx, hdr, fds, nr_fds);
+ if (unlikely(ret < 0)) {
+ if (ret == -EAGAIN || ret == -EWOULDBLOCK) {
+ return 0;
+ }
+ if (ret != -EINTR) {
+ vfu_log(vfu_ctx, VFU_ERR, "failed to receive request: %s",
+ strerror(-ret));
+ }
+ return ret;
+ }
+ if (unlikely(ret == 0)) {
+ if (errno == EINTR) {
+ return -EINTR;
+ }
+ if (errno == 0) {
+ vfu_log(vfu_ctx, VFU_INF, "vfio-user client closed connection");
+ } else {
+ vfu_log(vfu_ctx, VFU_ERR, "end of file: %m");
+ }
+ return -ENOTCONN;
+ }
+ return ret;
+}
+
+static int
+process_request(vfu_ctx_t *vfu_ctx)
+{
+ struct vfio_user_header hdr = { 0, };
+ int ret;
+ int *fds = NULL;
+ int nr_fds;
+ struct vfio_irq_info irq_info;
+ struct vfio_device_info dev_info;
+ struct vfio_region_info *dev_reg_info = NULL;
+ struct iovec _iovecs[2] = { { 0, } };
+ struct iovec *iovecs = NULL;
+ size_t nr_iovecs = 0;
+ bool free_iovec_data = true;
+ void *cmd_data = NULL;
+
+ assert(vfu_ctx != NULL);
+
+ if (device_is_stopped(vfu_ctx->migration)) {
+ return -ESHUTDOWN;
+ }
+
+ /*
+ * FIXME if migration device state is VFIO_DEVICE_STATE_STOP then only
+ * migration-related operations should execute. However, some operations
+ * are harmless (e.g. get region info). At the minimum we should fail
+ * accesses to device regions other than the migration region. I'd expect
+ * DMA unmap and get dirty pages to be required even in the stop-and-copy
+ * state.
+ */
+
+ nr_fds = vfu_ctx->client_max_fds;
+ fds = alloca(nr_fds * sizeof(int));
+
+ ret = get_next_command(vfu_ctx, &hdr, fds, &nr_fds);
+ if (ret <= 0) {
+ return ret;
+ }
+
+ ret = validate_header(vfu_ctx, &hdr, ret);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /*
+ * TODO from now on if an error occurs we still need to reply. Move this
+ * code into a separate function so that we don't have to use goto.
+ */
+
+ hdr.msg_size -= sizeof(hdr);
+ if (hdr.msg_size > 0) {
+ cmd_data = malloc(hdr.msg_size);
+ if (cmd_data == NULL) {
+ ret = -ENOMEM;
+ goto reply;
+ }
+ // FIXME: should be transport op
+ ret = recv(vfu_ctx->conn_fd, cmd_data, hdr.msg_size, 0);
+ if (ret < 0) {
+ ret = -errno;
+ goto reply;
+ }
+ if (ret != (int)hdr.msg_size) {
+ vfu_log(vfu_ctx, VFU_ERR, "short read, expected=%d, actual=%d",
+ hdr.msg_size, ret);
+ ret = -EINVAL;
+ goto reply;
+ }
+ }
+
+ if (device_is_stopped_and_copying(vfu_ctx->migration)
+ && !(hdr.cmd == VFIO_USER_REGION_READ || hdr.cmd == VFIO_USER_REGION_WRITE)) {
+ vfu_log(vfu_ctx, VFU_ERR,
+ "bad command %d while device in stop-and-copy state", hdr.cmd);
+ ret = -EINVAL;
+ goto reply;
+ }
+
+ switch (hdr.cmd) {
+ case VFIO_USER_DMA_MAP:
+ case VFIO_USER_DMA_UNMAP:
+ ret = handle_dma_map_or_unmap(vfu_ctx, hdr.msg_size,
+ hdr.cmd == VFIO_USER_DMA_MAP,
+ fds, nr_fds, cmd_data);
+ break;
+ case VFIO_USER_DEVICE_GET_INFO:
+ ret = handle_device_get_info(vfu_ctx, hdr.msg_size, &dev_info);
+ if (ret >= 0) {
+ _iovecs[1].iov_base = &dev_info;
+ _iovecs[1].iov_len = dev_info.argsz;
+ iovecs = _iovecs;
+ nr_iovecs = 2;
+ }
+ break;
+ case VFIO_USER_DEVICE_GET_REGION_INFO:
+ ret = handle_device_get_region_info(vfu_ctx, hdr.msg_size, cmd_data,
+ &dev_reg_info);
+ if (ret == 0) {
+ _iovecs[1].iov_base = dev_reg_info;
+ _iovecs[1].iov_len = dev_reg_info->argsz;
+ iovecs = _iovecs;
+ nr_iovecs = 2;
+ }
+ break;
+ case VFIO_USER_DEVICE_GET_IRQ_INFO:
+ ret = handle_device_get_irq_info(vfu_ctx, hdr.msg_size, cmd_data,
+ &irq_info);
+ if (ret == 0) {
+ _iovecs[1].iov_base = &irq_info;
+ _iovecs[1].iov_len = sizeof irq_info;
+ iovecs = _iovecs;
+ nr_iovecs = 2;
+ }
+ break;
+ case VFIO_USER_DEVICE_SET_IRQS:
+ ret = handle_device_set_irqs(vfu_ctx, hdr.msg_size, fds, nr_fds,
+ cmd_data);
+ break;
+ case VFIO_USER_REGION_READ:
+ case VFIO_USER_REGION_WRITE:
+ iovecs = _iovecs;
+ ret = handle_region_access(vfu_ctx, hdr.msg_size, hdr.cmd,
+ &iovecs[1].iov_base, &iovecs[1].iov_len,
+ cmd_data);
+ nr_iovecs = 2;
+ break;
+ case VFIO_USER_DEVICE_RESET:
+ ret = handle_device_reset(vfu_ctx);
+ break;
+ case VFIO_USER_DIRTY_PAGES:
+ // FIXME: don't allow migration calls if migration == NULL
+ ret = handle_dirty_pages(vfu_ctx, hdr.msg_size, &iovecs, &nr_iovecs,
+ cmd_data);
+ if (ret >= 0) {
+ free_iovec_data = false;
+ }
+ break;
+ default:
+ vfu_log(vfu_ctx, VFU_ERR, "bad command %d", hdr.cmd);
+ ret = -EINVAL;
+ goto reply;
+ }
+
+reply:
+ /*
+ * TODO: In case of error during command handling set errno respectively
+ * in the reply message.
+ */
+ if (ret < 0) {
+ vfu_log(vfu_ctx, VFU_ERR, "failed to handle command %d: %s", hdr.cmd,
+ strerror(-ret));
+ } else {
+ ret = 0;
+ }
+
+ // FIXME: SPEC: should the reply include the command? I'd say yes?
+ ret = vfu_send_iovec(vfu_ctx->conn_fd, hdr.msg_id, true,
+ 0, iovecs, nr_iovecs, NULL, 0, -ret);
+ if (unlikely(ret < 0)) {
+ vfu_log(vfu_ctx, VFU_ERR, "failed to complete command: %s",
+ strerror(-ret));
+ }
+ if (iovecs != NULL && iovecs != _iovecs) {
+ if (free_iovec_data) {
+ size_t i;
+ for (i = 0; i < nr_iovecs; i++) {
+ free(iovecs[i].iov_base);
+ }
+ }
+ free(iovecs);
+ }
+ free(cmd_data);
+
+ return ret;
+}
+
+static int prepare_ctx(vfu_ctx_t *vfu_ctx)
+{
+ vfu_reg_info_t *cfg_reg;
+ const vfu_reg_info_t zero_reg = { 0 };
+ int err;
+ uint32_t max_ivs = 0, i;
+ size_t size;
+
+ if (vfu_ctx->ready != 0) {
+ return 0;
+ }
+
+ /*
+ * With LIBVFIO_USER_FLAG_ATTACH_NB caller is always expected to call
+ * vfu_ctx_try_attach().
+ */
+ if ((vfu_ctx->flags & LIBVFIO_USER_FLAG_ATTACH_NB) == 0) {
+ vfu_ctx->conn_fd = vfu_ctx->trans->attach(vfu_ctx);
+ if (vfu_ctx->conn_fd < 0) {
+ err = vfu_ctx->conn_fd;
+ if (err != EINTR) {
+ vfu_log(vfu_ctx, VFU_ERR, "failed to attach: %s",
+ strerror(-err));
+ }
+ return err;
+ }
+ }
+
+ cfg_reg = &vfu_ctx->reg_info[VFU_PCI_DEV_CFG_REGION_IDX];
+
+ // Set a default config region if none provided.
+ /* TODO should it be enough to check that the size of region is 0? */
+ if (memcmp(cfg_reg, &zero_reg, sizeof(*cfg_reg)) == 0) {
+ cfg_reg->flags = VFU_REG_FLAG_RW;
+ cfg_reg->size = PCI_CFG_SPACE_SIZE;
+ }
+
+ // This maybe allocated by vfu_setup_pci_config_hdr().
+ if (vfu_ctx->pci_config_space == NULL) {
+ vfu_ctx->pci_config_space = calloc(1, cfg_reg->size);
+ if (vfu_ctx->pci_config_space == NULL) {
+ return -ENOMEM;
+ }
+ }
+
+ // Set type for region registers.
+ for (i = 0; i < PCI_BARS_NR; i++) {
+ if (!(vfu_ctx->reg_info[i].flags & VFU_REG_FLAG_MEM)) {
+ vfu_ctx->pci_config_space->hdr.bars[i].io.region_type |= 0x1;
+ }
+ }
+
+ if (vfu_ctx->irqs == NULL) {
+ /*
+ * FIXME need to check that the number of MSI and MSI-X IRQs are valid
+ * (1, 2, 4, 8, 16 or 32 for MSI and up to 2048 for MSI-X).
+ */
+
+ // Work out highest count of irq vectors.
+ for (i = 0; i < VFU_DEV_NUM_IRQS; i++) {
+ if (max_ivs < vfu_ctx->irq_count[i]) {
+ max_ivs = vfu_ctx->irq_count[i];
+ }
+ }
+
+ //FIXME: assert(max_ivs > 0)?
+ size = sizeof(int) * max_ivs;
+ vfu_ctx->irqs = calloc(1, sizeof(vfu_irqs_t) + size);
+ if (vfu_ctx->irqs == NULL) {
+ // vfu_ctx->pci_config_space should be free'ed by vfu_destroy_ctx().
+ return -ENOMEM;
+ }
+
+ // Set context irq information.
+ for (i = 0; i < max_ivs; i++) {
+ vfu_ctx->irqs->efds[i] = -1;
+ }
+ vfu_ctx->irqs->err_efd = -1;
+ vfu_ctx->irqs->req_efd = -1;
+ vfu_ctx->irqs->type = IRQ_NONE;
+ vfu_ctx->irqs->max_ivs = max_ivs;
+
+ // Reflect on the config space whether INTX is available.
+ if (vfu_ctx->irq_count[VFU_DEV_INTX_IRQ] != 0) {
+ vfu_ctx->pci_config_space->hdr.intr.ipin = 1; // INTA#
+ }
+ }
+
+ if (vfu_ctx->caps != NULL) {
+ vfu_ctx->pci_config_space->hdr.sts.cl = 0x1;
+ vfu_ctx->pci_config_space->hdr.cap = PCI_STD_HEADER_SIZEOF;
+ }
+ vfu_ctx->ready = 1;
+
+ return 0;
+}
+
+int
+vfu_ctx_drive(vfu_ctx_t *vfu_ctx)
+{
+ int err;
+
+ if (vfu_ctx == NULL) {
+ return ERROR(EINVAL);
+ }
+
+ err = prepare_ctx(vfu_ctx);
+ if (err < 0) {
+ return ERROR(-err);
+ }
+
+ do {
+ err = process_request(vfu_ctx);
+ } while (err >= 0);
+
+ return err;
+}
+
+int
+vfu_ctx_poll(vfu_ctx_t *vfu_ctx)
+{
+ int err;
+
+ if (unlikely((vfu_ctx->flags & LIBVFIO_USER_FLAG_ATTACH_NB) == 0)) {
+ return -ENOTSUP;
+ }
+
+ assert(vfu_ctx->ready == 1);
+ err = process_request(vfu_ctx);
+
+ return err >= 0 ? 0 : err;
+}
+
+/* FIXME this is not enough anymore ? */
+void *
+vfu_mmap(vfu_ctx_t *vfu_ctx, off_t offset, size_t length)
+{
+ if ((vfu_ctx == NULL) || (length == 0) || !PAGE_ALIGNED(offset)) {
+ if (vfu_ctx != NULL) {
+ vfu_log(vfu_ctx, VFU_DBG, "bad device mmap region %#lx-%#lx\n",
+ offset, offset + length);
+ }
+ errno = EINVAL;
+ return MAP_FAILED;
+ }
+
+ return mmap(NULL, length, PROT_READ | PROT_WRITE, MAP_SHARED,
+ vfu_ctx->fd, offset);
+}
+
+static void
+free_sparse_mmap_areas(vfu_ctx_t *vfu_ctx)
+{
+ int i;
+
+ assert(vfu_ctx != NULL);
+
+ for (i = 0; i < (int)vfu_ctx->nr_regions; i++) {
+ free(vfu_ctx->reg_info[i].mmap_areas);
+ }
+}
+
+void
+vfu_ctx_destroy(vfu_ctx_t *vfu_ctx)
+{
+
+ if (vfu_ctx == NULL) {
+ return;
+ }
+
+ free(vfu_ctx->uuid);
+ free(vfu_ctx->pci_config_space);
+ if (vfu_ctx->trans->detach != NULL) {
+ vfu_ctx->trans->detach(vfu_ctx);
+ }
+ if (vfu_ctx->dma != NULL) {
+ dma_controller_destroy(vfu_ctx->dma);
+ }
+ free_sparse_mmap_areas(vfu_ctx);
+ free(vfu_ctx->reg_info);
+ free(vfu_ctx->caps);
+ free(vfu_ctx->migration);
+ free(vfu_ctx->irqs);
+ free(vfu_ctx);
+ // FIXME: Maybe close any open irq efds? Unmap stuff?
+}
+
+struct vfu_sparse_mmap_areas*
+copy_sparse_mmap_area(struct vfu_sparse_mmap_areas *src)
+{
+ struct vfu_sparse_mmap_areas *dest;
+ size_t size;
+
+ assert(src != NULL);
+
+ size = sizeof(*dest) + (src->nr_mmap_areas * sizeof(struct vfu_mmap_area));
+ dest = calloc(1, size);
+ if (dest != NULL) {
+ memcpy(dest, src, size);
+ }
+ return dest;
+}
+
+int
+vfu_ctx_try_attach(vfu_ctx_t *vfu_ctx)
+{
+ int err;
+
+ assert(vfu_ctx != NULL);
+
+ if ((vfu_ctx->flags & LIBVFIO_USER_FLAG_ATTACH_NB) == 0) {
+ return ERROR(EINVAL);
+ }
+
+ err = prepare_ctx(vfu_ctx);
+ if (err < 0) {
+ return ERROR(-err);
+ }
+
+ return vfu_ctx->trans->attach(vfu_ctx);
+}
+
+vfu_ctx_t *vfu_create_ctx(vfu_trans_t trans, const char *path, int flags,
+ void *pvt)
+{
+ vfu_ctx_t *vfu_ctx = NULL;
+ int err = 0;
+
+ if (trans != VFU_TRANS_SOCK) {
+ errno = ENOTSUP;
+ return NULL;
+ }
+
+ vfu_ctx = calloc(1, sizeof(vfu_ctx_t));
+ if (vfu_ctx == NULL) {
+ return NULL;
+ }
+ vfu_ctx->trans = &sock_transport_ops;
+
+ //FIXME: Validate arguments.
+ // Set other context data.
+ vfu_ctx->pvt = pvt;
+ vfu_ctx->flags = flags;
+ vfu_ctx->log_lvl = VFU_ERR;
+
+ vfu_ctx->uuid = strdup(path);
+ if (vfu_ctx->uuid == NULL) {
+ err = errno;
+ goto out;
+ }
+
+ /*
+ * FIXME: Now we always allocate for migration region. Check if its better
+ * to seperate migration region from standard regions in vfu_ctx.reg_info
+ * and move it into vfu_ctx.migration.
+ */
+ vfu_ctx->nr_regions = VFU_PCI_DEV_NUM_REGIONS + 1;
+ vfu_ctx->reg_info = calloc(vfu_ctx->nr_regions, sizeof *vfu_ctx->reg_info);
+ if (vfu_ctx->reg_info == NULL) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ if (vfu_ctx->trans->init != NULL) {
+ err = vfu_ctx->trans->init(vfu_ctx);
+ if (err < 0) {
+ goto out;
+ }
+ vfu_ctx->fd = err;
+ }
+ err = 0;
+
+out:
+ if (err != 0) {
+ if (vfu_ctx != NULL) {
+ vfu_ctx_destroy(vfu_ctx);
+ vfu_ctx = NULL;
+ }
+ errno = -err;
+ }
+
+ return vfu_ctx;
+}
+
+int vfu_setup_log(vfu_ctx_t *vfu_ctx, vfu_log_fn_t *log, vfu_log_lvl_t log_lvl)
+{
+
+ if (log_lvl != VFU_ERR && log_lvl != VFU_INF && log_lvl != VFU_DBG) {
+ return ERROR(EINVAL);
+ }
+
+ vfu_ctx->log = log;
+ vfu_ctx->log_lvl = log_lvl;
+
+ return 0;
+}
+
+int vfu_pci_setup_config_hdr(vfu_ctx_t *vfu_ctx, vfu_pci_hdr_id_t id,
+ vfu_pci_hdr_ss_t ss, vfu_pci_hdr_cc_t cc,
+ UNUSED bool extended)
+{
+ vfu_pci_config_space_t *config_space;
+
+ assert(vfu_ctx != NULL);
+
+ if (vfu_ctx->pci_config_space != NULL) {
+ vfu_log(vfu_ctx, VFU_ERR, "pci header already setup");
+ return ERROR(EEXIST);
+ }
+
+ /* TODO: supported extended PCI config space. */
+
+ // Allocate a buffer for the config space.
+ config_space = calloc(1, PCI_CFG_SPACE_SIZE);
+ if (config_space == NULL) {
+ return ERROR(ENOMEM);
+ }
+
+ config_space->hdr.id = id;
+ config_space->hdr.ss = ss;
+ config_space->hdr.cc = cc;
+ vfu_ctx->pci_config_space = config_space;
+
+ return 0;
+}
+
+int vfu_pci_setup_caps(vfu_ctx_t *vfu_ctx, vfu_cap_t **caps, int nr_caps)
+{
+ int ret;
+
+ assert(vfu_ctx != NULL);
+
+ if (vfu_ctx->caps != NULL) {
+ vfu_log(vfu_ctx, VFU_ERR, "capabilities are already setup");
+ return ERROR(EEXIST);
+ }
+
+ if (caps == NULL || nr_caps == 0) {
+ vfu_log(vfu_ctx, VFU_ERR, "Invalid args passed");
+ return ERROR(EINVAL);
+ }
+
+ vfu_ctx->caps = caps_create(vfu_ctx, caps, nr_caps, &ret);
+ if (vfu_ctx->caps == NULL) {
+ vfu_log(vfu_ctx, VFU_ERR, "failed to create PCI capabilities: %s",
+ strerror(ret));
+ return ERROR(ret);
+ }
+
+ return 0;
+}
+
+static int
+copy_sparse_mmap_areas(vfu_reg_info_t *reg_info,
+ struct vfu_sparse_mmap_areas *mmap_areas)
+{
+ int nr_mmap_areas;
+ size_t size;
+
+ if (mmap_areas == NULL) {
+ return 0;
+ }
+
+ nr_mmap_areas = mmap_areas->nr_mmap_areas;
+ size = sizeof(*mmap_areas) + (nr_mmap_areas * sizeof(struct vfu_mmap_area));
+ reg_info->mmap_areas = calloc(1, size);
+ if (reg_info->mmap_areas == NULL) {
+ return -ENOMEM;
+ }
+
+ memcpy(reg_info->mmap_areas, mmap_areas, size);
+
+ return 0;
+}
+
+static inline bool is_valid_pci_config_space_region(int flags, size_t size)
+{
+ return flags == VFU_REG_FLAG_RW && (size == PCI_CFG_SPACE_SIZE
+ || size == PCI_CFG_SPACE_EXP_SIZE);
+}
+
+int vfu_setup_region(vfu_ctx_t *vfu_ctx, int region_idx, size_t size,
+ vfu_region_access_cb_t *region_access, int flags,
+ struct vfu_sparse_mmap_areas *mmap_areas,
+ vfu_map_region_cb_t *map)
+{
+ int ret;
+
+ assert(vfu_ctx != NULL);
+
+ switch(region_idx) {
+ case VFU_PCI_DEV_BAR0_REGION_IDX ... VFU_PCI_DEV_VGA_REGION_IDX:
+ // Validate the config region provided.
+ if (region_idx == VFU_PCI_DEV_CFG_REGION_IDX &&
+ !is_valid_pci_config_space_region(flags, size)) {
+ return ERROR(EINVAL);
+ }
+
+ vfu_ctx->reg_info[region_idx].flags = flags;
+ vfu_ctx->reg_info[region_idx].size = size;
+ vfu_ctx->reg_info[region_idx].fn = region_access;
+
+ if (map != NULL) {
+ vfu_ctx->reg_info[region_idx].map = map;
+ }
+ if (mmap_areas) {
+ ret = copy_sparse_mmap_areas(&vfu_ctx->reg_info[region_idx],
+ mmap_areas);
+ if (ret < 0) {
+ return ERROR(-ret);
+ }
+ }
+ break;
+ default:
+ vfu_log(vfu_ctx, VFU_ERR, "Invalid region index %d", region_idx);
+ return ERROR(EINVAL);
+ }
+
+ return 0;
+}
+
+int vfu_setup_device_reset_cb(vfu_ctx_t *vfu_ctx, vfu_reset_cb_t *reset)
+{
+
+ assert(vfu_ctx != NULL);
+ vfu_ctx->reset = reset;
+
+ return 0;
+}
+
+int vfu_setup_device_dma_cb(vfu_ctx_t *vfu_ctx, vfu_map_dma_cb_t *map_dma,
+ vfu_unmap_dma_cb_t *unmap_dma)
+{
+
+ assert(vfu_ctx != NULL);
+
+ vfu_ctx->map_dma = map_dma;
+ vfu_ctx->unmap_dma = unmap_dma;
+
+ // Create the internal DMA controller.
+ if (vfu_ctx->unmap_dma != NULL) {
+ vfu_ctx->dma = dma_controller_create(vfu_ctx, VFU_DMA_REGIONS);
+ if (vfu_ctx->dma == NULL) {
+ return ERROR(ENOMEM);
+ }
+ }
+
+ return 0;
+}
+
+int vfu_setup_device_nr_irqs(vfu_ctx_t *vfu_ctx, enum vfu_dev_irq_type type,
+ uint32_t count)
+{
+
+ assert(vfu_ctx != NULL);
+
+ if (type < VFU_DEV_INTX_IRQ || type > VFU_DEV_REQ_IRQ) {
+ vfu_log(vfu_ctx, VFU_ERR, "Invalid IRQ index %d, should be between "
+ "(%d to %d)", type, VFU_DEV_INTX_IRQ,
+ VFU_DEV_REQ_IRQ);
+ return ERROR(EINVAL);
+ }
+
+ vfu_ctx->irq_count[type] = count;
+
+ return 0;
+}
+
+int vfu_setup_device_migration(vfu_ctx_t *vfu_ctx, vfu_migration_t *migration)
+{
+ vfu_reg_info_t *migr_reg;
+ int ret = 0;
+
+ assert(vfu_ctx != NULL);
+
+ //FIXME: Validate args.
+
+ if (vfu_ctx->migr_reg != NULL) {
+ vfu_log(vfu_ctx, VFU_ERR, "device migration is already setup");
+ return ERROR(EEXIST);
+ }
+
+ /* FIXME hacky, find a more robust way to allocate a region index */
+ migr_reg = &vfu_ctx->reg_info[(vfu_ctx->nr_regions - 1)];
+
+ /* FIXME: Are there sparse areas need to be setup flags accordingly */
+ ret = copy_sparse_mmap_areas(migr_reg, migration->mmap_areas);
+ if (ret < 0) {
+ return ERROR(-ret);
+ }
+
+ migr_reg->flags = VFU_REG_FLAG_RW;
+ migr_reg->size = sizeof(struct vfio_device_migration_info) + migration->size;
+
+ vfu_ctx->migration = init_migration(migration, &ret);
+ if (vfu_ctx->migration == NULL) {
+ vfu_log(vfu_ctx, VFU_ERR, "failed to initialize device migration");
+ free(migr_reg->mmap_areas);
+ return ERROR(ret);
+ }
+ vfu_ctx->migr_reg = migr_reg;
+
+ return 0;
+}
+
+/*
+ * Returns a pointer to the standard part of the PCI configuration space.
+ */
+inline vfu_pci_config_space_t *
+vfu_pci_get_config_space(vfu_ctx_t *vfu_ctx)
+{
+ assert(vfu_ctx != NULL);
+ return vfu_ctx->pci_config_space;
+}
+
+/*
+ * Returns a pointer to the non-standard part of the PCI configuration space.
+ */
+inline uint8_t *
+vfu_get_pci_non_std_config_space(vfu_ctx_t *vfu_ctx)
+{
+ assert(vfu_ctx != NULL);
+ return (uint8_t *)&vfu_ctx->pci_config_space->non_std;
+}
+
+inline vfu_reg_info_t *
+vfu_get_region_info(vfu_ctx_t *vfu_ctx)
+{
+ assert(vfu_ctx != NULL);
+ return vfu_ctx->reg_info;
+}
+
+inline int
+vfu_addr_to_sg(vfu_ctx_t *vfu_ctx, dma_addr_t dma_addr,
+ uint32_t len, dma_sg_t *sg, int max_sg, int prot)
+{
+ assert(vfu_ctx != NULL);
+
+ if (unlikely(vfu_ctx->unmap_dma == NULL)) {
+ errno = EINVAL;
+ return -1;
+ }
+ return dma_addr_to_sg(vfu_ctx->dma, dma_addr, len, sg, max_sg, prot);
+}
+
+inline int
+vfu_map_sg(vfu_ctx_t *vfu_ctx, const dma_sg_t *sg,
+ struct iovec *iov, int cnt)
+{
+ if (unlikely(vfu_ctx->unmap_dma == NULL)) {
+ errno = EINVAL;
+ return -1;
+ }
+ return dma_map_sg(vfu_ctx->dma, sg, iov, cnt);
+}
+
+inline void
+vfu_unmap_sg(vfu_ctx_t *vfu_ctx, const dma_sg_t *sg, struct iovec *iov, int cnt)
+{
+ if (unlikely(vfu_ctx->unmap_dma == NULL)) {
+ return;
+ }
+ return dma_unmap_sg(vfu_ctx->dma, sg, iov, cnt);
+}
+
+uint8_t *
+vfu_ctx_get_cap(vfu_ctx_t *vfu_ctx, uint8_t id)
+{
+ assert(vfu_ctx != NULL);
+
+ return cap_find_by_id(vfu_ctx, id);
+}
+
+int
+vfu_dma_read(vfu_ctx_t *vfu_ctx, dma_sg_t *sg, void *data)
+{
+ struct vfio_user_dma_region_access *dma_recv;
+ struct vfio_user_dma_region_access dma_send;
+ int recv_size;
+ int msg_id = 1, ret;
+
+ assert(vfu_ctx != NULL);
+ assert(sg != NULL);
+
+ recv_size = sizeof(*dma_recv) + sg->length;
+
+ dma_recv = calloc(recv_size, 1);
+ if (dma_recv == NULL) {
+ return -ENOMEM;
+ }
+
+ dma_send.addr = sg->dma_addr;
+ dma_send.count = sg->length;
+ ret = vfu_msg(vfu_ctx->conn_fd, msg_id, VFIO_USER_DMA_READ,
+ &dma_send, sizeof dma_send, NULL,
+ dma_recv, recv_size);
+ memcpy(data, dma_recv->data, sg->length); /* FIXME no need for memcpy */
+ free(dma_recv);
+
+ return ret;
+}
+
+int
+vfu_dma_write(vfu_ctx_t *vfu_ctx, dma_sg_t *sg, void *data)
+{
+ struct vfio_user_dma_region_access *dma_send, dma_recv;
+ int send_size = sizeof(*dma_send) + sg->length;
+ int msg_id = 1, ret;
+
+ assert(vfu_ctx != NULL);
+ assert(sg != NULL);
+
+ dma_send = calloc(send_size, 1);
+ if (dma_send == NULL) {
+ return -ENOMEM;
+ }
+ dma_send->addr = sg->dma_addr;
+ dma_send->count = sg->length;
+ memcpy(dma_send->data, data, sg->length); /* FIXME no need to copy! */
+ ret = vfu_msg(vfu_ctx->conn_fd, msg_id, VFIO_USER_DMA_WRITE,
+ dma_send, send_size, NULL,
+ &dma_recv, sizeof(dma_recv));
+ free(dma_send);
+
+ return ret;
+}
+
+/* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */