aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/libvfio-user.h76
-rw-r--r--lib/cap.c4
-rw-r--r--lib/libvfio-user.c501
-rw-r--r--lib/migration.c19
-rw-r--r--lib/migration.h5
-rw-r--r--lib/pci.c148
-rw-r--r--lib/pci.h11
-rw-r--r--lib/private.h29
-rw-r--r--test/unit-tests.c44
9 files changed, 367 insertions, 470 deletions
diff --git a/include/libvfio-user.h b/include/libvfio-user.h
index 61f94ca..d74087e 100644
--- a/include/libvfio-user.h
+++ b/include/libvfio-user.h
@@ -200,37 +200,18 @@ typedef ssize_t (vfu_region_access_cb_t)(vfu_ctx_t *vfu_ctx, char *buf,
#define VFU_REGION_FLAG_READ (1 << 0)
#define VFU_REGION_FLAG_WRITE (1 << 1)
-#define VFU_REGION_FLAG_MMAP (1 << 2) // TODO: how this relates to IO bar?
#define VFU_REGION_FLAG_RW (VFU_REGION_FLAG_READ | VFU_REGION_FLAG_WRITE)
-#define VFU_REGION_FLAG_MEM (1 << 3) // if unset, bar is IO
+#define VFU_REGION_FLAG_MEM (1 << 2) // if unset, bar is IO
/**
- * Set up a region.
+ * Set up a device region.
*
- * If this is the PCI configuration space, the @size argument is ignored. The
- * size of the region is determined by the PCI type (set when the libvfio-user
- * context is created). Accesses to the PCI configuration space header and the
- * PCI capabilities are handled internally; the user supplied callback is not
- * called.
+ * A region is an area of device memory that can be accessed by the client,
+ * either via VFIO_USER_REGION_READ/WRITE, or directly by mapping the region
+ * into the client's address space if an fd is given.
*
- * @vfu_ctx: the libvfio-user context
- * @region_idx: region index
- * @size: size of the region
- * @region_access: callback function to access region. If the region is memory
- * mappable and the client accesses the region or part of sparse area, then
- * the callback is not called.
- * @flags: region flags (VFU_REGION_FLAG_)
- * @mmap_areas: array of memory mappable areas. This array provides to the
- * server greater control of which specific areas should be memory mapped by
- * the client. Each element in the @mmap_areas array describes one such area.
- * Ignored if @nr_mmap_areas is 0 or if the region is not memory mappable.
- * @nr_mmap_areas: number of sparse areas in @mmap_areas. Must be 0 if the
- * region is not memory mappable.
- * @fd: file descriptor of the file backing the region if it's a mappable
- * region. It is the server's responsibility to create a file suitable for
- * memory mapping by the client. Ignored if the region is not memory mappable.
- *
- * A note on memory-mappable regions: the client can memory map any part of the
+ * A mappable region can be split into mappable sub-areas according to the
+ * @mmap_areas array. Note that the client can memory map any part of the
* file descriptor, even if not supposed to do so according to @mmap_areas.
* There is no way in Linux to avoid this.
*
@@ -241,6 +222,35 @@ typedef ssize_t (vfu_region_access_cb_t)(vfu_ctx_t *vfu_ctx, char *buf,
* descriptors of these DM targets. This is something we can document and
* demonstrate in a sample.
*
+ * Areas that are accessed via such a mapping by definition do not invoke any
+ * given callback. However, the callback can still be invoked, even on a
+ * mappable area, if the client chooses to call VFIO_USER_REGION_READ/WRITE.
+ *
+ * A VFU_PCI_DEV_CFG_REGION_IDX region, corresponding to PCI config space, has
+ * special handling:
+ *
+ * - the @size argument is ignored: the region size is always the size defined
+ * by the relevant PCI specification
+ * - all accesses to the standard PCI header (i.e. the first 64 bytes of the
+ * region) are handled by the library
+ * - all accesses to known PCI capabilities are handled by the library
+ * - if no callback is provided, reads to other areas are a simple memcpy(),
+ * and writes are an error
+ * - otherwise, the callback is expected to handle the access
+ *
+ * @vfu_ctx: the libvfio-user context
+ * @region_idx: region index
+ * @size: size of the region
+ * @region_access: callback function to access region
+ * @flags: region flags (VFU_REGION_FLAG_)
+ * @mmap_areas: array of memory mappable areas; must be provided if the region
+ * is mappable.
+ * @nr_mmap_areas: number of sparse areas in @mmap_areas; must be provided if
+ * the region is mappable.
+ * @fd: file descriptor of the file backing the region if the region is
+ * mappable; it is the server's responsibility to create a file suitable for
+ * memory mapping by the client.
+ *
* @returns 0 on success, -1 on error, Sets errno.
*/
int
@@ -490,20 +500,6 @@ void
vfu_unmap_sg(vfu_ctx_t *vfu_ctx, const dma_sg_t *sg,
struct iovec *iov, int cnt);
-//FIXME: Remove if we dont need this.
-/**
- * Returns the PCI region given the position and size of an address span in the
- * PCI configuration space.
- *
- * @pos: offset of the address span
- * @count: size of the address span
- * @off: output parameter that receives the relative offset within the region.
- *
- * Returns the PCI region (VFU_PCI_DEV_XXX_REGION_IDX), or -errno on error.
- */
-int
-vfu_get_region(loff_t pos, size_t count, loff_t *off);
-
/**
* Read from the dma region exposed by the client.
*
diff --git a/lib/cap.c b/lib/cap.c
index f427d1a..d42b5d4 100644
--- a/lib/cap.c
+++ b/lib/cap.c
@@ -83,9 +83,7 @@ cap_is_accessed(struct cap *caps, int nr_caps, size_t count, loff_t offset)
}
/*
- * FIXME write starts before capabilities but extends into them. I don't
- * think that the while loop in vfu_access will allow this in the first
- * place.
+ * FIXME write starts before capabilities but extends into them.
*/
assert(false);
} else if (offset > caps[nr_caps - 1].end) {
diff --git a/lib/libvfio-user.c b/lib/libvfio-user.c
index 4e9f58c..c0503ff 100644
--- a/lib/libvfio-user.c
+++ b/lib/libvfio-user.c
@@ -165,10 +165,11 @@ dev_get_caps(vfu_ctx_t *vfu_ctx, vfu_reg_info_t *vfu_reg, bool is_migr_reg,
return 0;
}
-#ifdef VFU_VERBOSE_LOGGING
-void
-dump_buffer(const char *prefix, const char *buf, uint32_t count)
+inline void
+dump_buffer(const char *prefix UNUSED, const char *buf UNUSED,
+ uint32_t count UNUSED)
{
+#ifdef VFU_VERBOSE_LOGGING
int i;
const size_t bytes_per_line = 0x8;
@@ -188,10 +189,8 @@ dump_buffer(const char *prefix, const char *buf, uint32_t count)
if (i % bytes_per_line != 0) {
fprintf(stderr, "\n");
}
-}
-#else
-#define dump_buffer(prefix, buf, count)
#endif
+}
static bool
is_migr_reg(vfu_ctx_t *vfu_ctx, int index)
@@ -199,238 +198,221 @@ is_migr_reg(vfu_ctx_t *vfu_ctx, int index)
return &vfu_ctx->reg_info[index] == vfu_ctx->migr_reg;
}
-long
-dev_get_reginfo(vfu_ctx_t *vfu_ctx, uint32_t index, uint32_t argsz,
- struct vfio_region_info **vfio_reg, int **fds, size_t *nr_fds)
+static ssize_t
+region_access(vfu_ctx_t *vfu_ctx, size_t region_index, char *buf,
+ size_t count, uint64_t offset, bool is_write)
{
- vfu_reg_info_t *vfu_reg;
- size_t caps_size;
+ ssize_t ret;
assert(vfu_ctx != NULL);
- assert(vfio_reg != NULL);
+ assert(buf != NULL);
- vfu_reg = &vfu_ctx->reg_info[index];
+ vfu_log(vfu_ctx, LOG_DEBUG, "%s %zu %#lx-%#lx", is_write ? "W" : "R",
+ region_index, offset, offset + count);
- if (index >= vfu_ctx->nr_regions) {
- vfu_log(vfu_ctx, LOG_DEBUG, "bad region index %d in get region info",
- index);
- return -EINVAL;
- }
-
- if (argsz < sizeof(struct vfio_region_info)) {
- vfu_log(vfu_ctx, LOG_DEBUG, "bad argsz %d", argsz);
- return -EINVAL;
+ if (is_write) {
+ dump_buffer("buffer write", buf, count);
}
- /*
- * TODO We assume that the client expects to receive argsz bytes.
- */
- *vfio_reg = calloc(1, argsz);
- if (!*vfio_reg) {
- return -ENOMEM;
- }
- caps_size = get_vfio_caps_size(is_migr_reg(vfu_ctx, index),
- vfu_reg->mmap_areas);
- (*vfio_reg)->argsz = caps_size + sizeof(struct vfio_region_info);
- (*vfio_reg)->flags = vfu_reg->flags;
- (*vfio_reg)->index = index;
- (*vfio_reg)->offset = region_to_offset((*vfio_reg)->index);
- (*vfio_reg)->size = vfu_reg->size;
+ if (region_index == VFU_PCI_DEV_CFG_REGION_IDX) {
+ ret = pci_config_space_access(vfu_ctx, buf, count, offset, is_write);
+ } else if (is_migr_reg(vfu_ctx, region_index)) {
+ ret = migration_region_access(vfu_ctx, buf, count, offset, is_write);
+ } else {
+ vfu_region_access_cb_t *cb = vfu_ctx->reg_info[region_index].cb;
- *nr_fds = 0;
- if (caps_size > 0) {
- if (vfu_reg->mmap_areas != NULL) {
- (*vfio_reg)->flags |= VFIO_REGION_INFO_FLAG_CAPS;
- }
- if (argsz >= (*vfio_reg)->argsz) {
- dev_get_caps(vfu_ctx, vfu_reg, is_migr_reg(vfu_ctx, index),
- *vfio_reg, fds, nr_fds);
+ if (cb == NULL) {
+ vfu_log(vfu_ctx, LOG_ERR, "no callback for region %d",
+ region_index);
+ return -EINVAL;
}
+
+ ret = cb(vfu_ctx, buf, count, offset, is_write);
}
- vfu_log(vfu_ctx, LOG_DEBUG, "region_info[%d] offset %#llx flags %#x size %llu "
- "argsz %u",
- (*vfio_reg)->index, (*vfio_reg)->offset, (*vfio_reg)->flags,
- (*vfio_reg)->size, (*vfio_reg)->argsz);
+ if (!is_write && (size_t)ret == count) {
+ dump_buffer("buffer read", buf, count);
+ }
- return 0;
+ return ret;
}
-int
-vfu_get_region(loff_t pos, size_t count, loff_t *off)
+static bool
+is_valid_region_access(vfu_ctx_t *vfu_ctx, size_t size, uint16_t cmd,
+ struct vfio_user_region_access *ra)
{
- int r;
+ size_t index;
+
+ assert(vfu_ctx != NULL);
+ assert(ra != NULL);
- assert(off != NULL);
+ if (size < sizeof (*ra)) {
+ vfu_log(vfu_ctx, LOG_ERR, "message size too small (%d)", size);
+ return false;
+ }
- r = offset_to_region(pos);
- if ((int)offset_to_region(pos + count) != r) {
- return -ENOENT;
+ if (cmd == VFIO_USER_REGION_WRITE && size - sizeof (*ra) != ra->count) {
+ vfu_log(vfu_ctx, LOG_ERR, "region write count too small: "
+ "expected %lu, got %u", size - sizeof (*ra), ra->count);
+ return false;
}
- *off = pos - region_to_offset(r);
- return r;
+ index = ra->region;
+
+ if (index >= vfu_ctx->nr_regions) {
+ vfu_log(vfu_ctx, LOG_ERR, "bad region index %u", index);
+ return false;
+ }
+
+ // FIXME: need to audit later for wraparound
+ if (ra->offset + ra->count > vfu_ctx->reg_info[index].size) {
+ vfu_log(vfu_ctx, LOG_ERR, "out of bounds region access %#lx-%#lx "
+ "(size %#lx)", ra->offset, ra->offset + ra->count,
+ vfu_ctx->reg_info[index].size);
+
+ return false;
+ }
+
+ if (device_is_stopped_and_copying(vfu_ctx->migration) &&
+ !is_migr_reg(vfu_ctx, index)) {
+ vfu_log(vfu_ctx, LOG_ERR,
+ "cannot access region %u while device in stop-and-copy state",
+ index);
+ return false;
+ }
+
+ return true;
}
-static ssize_t
-do_access(vfu_ctx_t *vfu_ctx, char *buf, uint8_t count, uint64_t pos, bool is_write)
+static int
+handle_region_access(vfu_ctx_t *vfu_ctx, uint32_t size, uint16_t cmd,
+ void **data, size_t *len,
+ struct vfio_user_region_access *ra)
{
- int idx;
- loff_t offset;
+ ssize_t ret;
+ char *buf;
assert(vfu_ctx != NULL);
- assert(buf != NULL);
- assert(count == 1 || count == 2 || count == 4 || count == 8);
-
- idx = vfu_get_region(pos, count, &offset);
- if (idx < 0) {
- vfu_log(vfu_ctx, LOG_ERR, "invalid region %d", idx);
- return idx;
- }
+ assert(data != NULL);
+ assert(ra != NULL);
- if (idx < 0 || idx >= (int)vfu_ctx->nr_regions) {
- vfu_log(vfu_ctx, LOG_ERR, "bad region %d", idx);
+ if (!is_valid_region_access(vfu_ctx, size, cmd, ra)) {
return -EINVAL;
}
- if (idx == VFU_PCI_DEV_CFG_REGION_IDX) {
- return pci_config_space_access(vfu_ctx, buf, count, offset, is_write);
+ if (ra->count == 0) {
+ return 0;
}
- if (is_migr_reg(vfu_ctx, idx)) {
- if (offset + count > vfu_ctx->reg_info[idx].size) {
- vfu_log(vfu_ctx, LOG_ERR, "read %#lx-%#lx past end of migration region (%#x)",
- offset, offset + count - 1,
- vfu_ctx->reg_info[idx].size);
- return -EINVAL;
- }
- return handle_migration_region_access(vfu_ctx, vfu_ctx->migration,
- buf, count, offset, is_write);
+ *len = sizeof (*ra);
+ if (cmd == VFIO_USER_REGION_READ) {
+ *len += ra->count;
+ }
+ *data = calloc(1, *len);
+ if (*data == NULL) {
+ return -ENOMEM;
+ }
+ if (cmd == VFIO_USER_REGION_READ) {
+ buf = (char *)(((struct vfio_user_region_access*)(*data)) + 1);
+ } else {
+ buf = (char *)(ra + 1);
}
- /*
- * Checking whether a callback exists might sound expensive however this
- * code is not performance critical. This works well when we don't expect a
- * region to be used, so the user of the library can simply leave the
- * callback NULL in vfu_create_ctx.
- */
- if (vfu_ctx->reg_info[idx].fn != NULL) {
- return vfu_ctx->reg_info[idx].fn(vfu_ctx, buf, count, offset, is_write);
+ ret = region_access(vfu_ctx, ra->region, buf, ra->count, ra->offset,
+ cmd == VFIO_USER_REGION_WRITE);
+
+ if (ret != ra->count) {
+ vfu_log(vfu_ctx, LOG_ERR, "failed to %s %#x-%#lx: %d",
+ cmd == VFIO_USER_REGION_WRITE ? "write" : "read",
+ ra->count, ra->offset + ra->count - 1, ret);
+ /* FIXME we should return whatever has been accessed, not an error */
+ if (ret >= 0) {
+ ret = -EINVAL;
+ }
+ return ret;
}
- vfu_log(vfu_ctx, LOG_ERR, "no callback for region %d", idx);
+ ra = *data;
+ ra->count = ret;
- return -EINVAL;
+ return 0;
}
-/*
- * Returns the number of bytes processed on success or a negative number on
- * error.
- *
- * TODO function naming, general cleanup of access path
- * FIXME we must be able to return values up to uint32_t bit, or negative on
- * error. Better to make return value an int and return the number of bytes
- * processed via an argument.
- */
-static ssize_t
-_vfu_access(vfu_ctx_t *vfu_ctx, char *buf, uint32_t count, uint64_t *ppos,
- bool is_write)
-{
- uint32_t done = 0;
- int ret;
-
- assert(vfu_ctx != NULL);
- /* buf and ppos can be NULL if count is 0 */
+#define VFU_REGION_SHIFT 40
- while (count) {
- size_t size;
- /*
- * Limit accesses to qword and enforce alignment. Figure out whether
- * the PCI spec requires this
- * FIXME while this makes sense for registers, we might be able to relax
- * this requirement and make some transfers more efficient. Maybe make
- * this a per-region option that can be set by the user?
- */
- if (count >= 8 && !(*ppos % 8)) {
- size = 8;
- } else if (count >= 4 && !(*ppos % 4)) {
- size = 4;
- } else if (count >= 2 && !(*ppos % 2)) {
- size = 2;
- } else {
- size = 1;
- }
- ret = do_access(vfu_ctx, buf, size, *ppos, is_write);
- if (ret <= 0) {
- vfu_log(vfu_ctx, LOG_ERR, "failed to %s %#lx-%#lx: %s",
- is_write ? "write to" : "read from", *ppos, *ppos + size - 1,
- strerror(-ret));
- /*
- * TODO if ret < 0 then it might contain a legitimate error code, why replace it with EFAULT?
- */
- return -EFAULT;
- }
- if (ret != (int)size) {
- vfu_log(vfu_ctx, LOG_DEBUG, "bad read %d != %ld", ret, size);
- }
- count -= size;
- done += size;
- *ppos += size;
- buf += size;
- }
- return done;
+static inline uint64_t
+region_to_offset(uint32_t region)
+{
+ return (uint64_t)region << VFU_REGION_SHIFT;
}
-static inline int
-vfu_access(vfu_ctx_t *vfu_ctx, bool is_write, char *rwbuf, uint32_t count,
- uint64_t *pos)
+long
+dev_get_reginfo(vfu_ctx_t *vfu_ctx, uint32_t index, uint32_t argsz,
+ struct vfio_region_info **vfio_reg, int **fds, size_t *nr_fds)
{
- uint32_t processed = 0, _count;
- int ret;
+ vfu_reg_info_t *vfu_reg;
+ size_t caps_size;
assert(vfu_ctx != NULL);
- assert(rwbuf != NULL);
- assert(pos != NULL);
+ assert(vfio_reg != NULL);
- vfu_log(vfu_ctx, LOG_DEBUG, "%s %#lx-%#lx", is_write ? "W" : "R", *pos,
- *pos + count - 1);
+ vfu_reg = &vfu_ctx->reg_info[index];
-#ifdef VFU_VERBOSE_LOGGING
- if (is_write) {
- dump_buffer("buffer write", rwbuf, count);
+ if (index >= vfu_ctx->nr_regions) {
+ vfu_log(vfu_ctx, LOG_DEBUG, "bad region index %d in get region info",
+ index);
+ return -EINVAL;
}
-#endif
- _count = count;
-
- if (pci_is_hdr_access(*pos)) {
- ret = pci_hdr_access(vfu_ctx, &_count, pos, is_write, rwbuf);
- if (ret != 0) {
- /* FIXME shouldn't we fail here? */
- vfu_log(vfu_ctx, LOG_ERR, "failed to access PCI header: %s",
- strerror(-ret));
-#ifdef VFU_VERBOSE_LOGGING
- dump_buffer("buffer write", rwbuf, _count);
-#endif
- }
+ if (argsz < sizeof(struct vfio_region_info)) {
+ vfu_log(vfu_ctx, LOG_DEBUG, "bad argsz %d", argsz);
+ return -EINVAL;
}
/*
- * count is how much has been processed by pci_hdr_access,
- * _count is how much there's left to be processed by vfu_access
+ * TODO We assume that the client expects to receive argsz bytes.
*/
- processed = count - _count;
- ret = _vfu_access(vfu_ctx, rwbuf + processed, _count, pos, is_write);
- if (ret >= 0) {
- ret += processed;
-#ifdef VFU_VERBOSE_LOGGING
- if (!is_write && err == ret) {
- dump_buffer("buffer read", rwbuf, ret);
+ *vfio_reg = calloc(1, argsz);
+ if (!*vfio_reg) {
+ return -ENOMEM;
+ }
+ caps_size = get_vfio_caps_size(is_migr_reg(vfu_ctx, index),
+ vfu_reg->mmap_areas);
+ (*vfio_reg)->argsz = caps_size + sizeof(struct vfio_region_info);
+ (*vfio_reg)->index = index;
+ (*vfio_reg)->offset = region_to_offset((*vfio_reg)->index);
+ (*vfio_reg)->size = vfu_reg->size;
+
+ (*vfio_reg)->flags = 0;
+
+ if (vfu_reg->flags & VFU_REGION_FLAG_READ) {
+ (*vfio_reg)->flags |= VFIO_REGION_INFO_FLAG_READ;
+ }
+ if (vfu_reg->flags & VFU_REGION_FLAG_WRITE) {
+ (*vfio_reg)->flags |= VFIO_REGION_INFO_FLAG_WRITE;
+ }
+
+ if (vfu_reg->fd != -1) {
+ (*vfio_reg)->flags |= VFIO_REGION_INFO_FLAG_MMAP;
+ }
+
+ *nr_fds = 0;
+ if (caps_size > 0) {
+ if (vfu_reg->mmap_areas != NULL) {
+ (*vfio_reg)->flags |= VFIO_REGION_INFO_FLAG_CAPS;
+ }
+ if (argsz >= (*vfio_reg)->argsz) {
+ dev_get_caps(vfu_ctx, vfu_reg, is_migr_reg(vfu_ctx, index),
+ *vfio_reg, fds, nr_fds);
}
-#endif
}
- return ret;
+ vfu_log(vfu_ctx, LOG_DEBUG, "region_info[%d] offset %#llx flags %#x size %llu "
+ "argsz %u",
+ (*vfio_reg)->index, (*vfio_reg)->offset, (*vfio_reg)->flags,
+ (*vfio_reg)->size, (*vfio_reg)->argsz);
+
+ return 0;
}
/* TODO merge with dev_get_reginfo */
@@ -595,96 +577,6 @@ handle_device_reset(vfu_ctx_t *vfu_ctx)
}
static int
-validate_region_access(vfu_ctx_t *vfu_ctx, uint32_t size, uint16_t cmd,
- struct vfio_user_region_access *region_access)
-{
- assert(region_access != NULL);
-
- if (size < sizeof *region_access) {
- vfu_log(vfu_ctx, LOG_ERR, "message size too small (%d)", size);
- return -EINVAL;
- }
-
- if (region_access->region > vfu_ctx->nr_regions || region_access->count <= 0) {
- vfu_log(vfu_ctx, LOG_ERR, "bad region %d and/or count %d",
- region_access->region, region_access->count);
- return -EINVAL;
- }
-
- if (device_is_stopped_and_copying(vfu_ctx->migration) &&
- !is_migr_reg(vfu_ctx, region_access->region)) {
- vfu_log(vfu_ctx, LOG_ERR,
- "cannot access region %d while device in stop-and-copy state",
- region_access->region);
- return -EINVAL;
- }
-
- if (cmd == VFIO_USER_REGION_WRITE &&
- size - sizeof *region_access != region_access->count)
- {
- vfu_log(vfu_ctx, LOG_ERR, "bad region access, expected %lu, actual %d",
- size - sizeof *region_access, region_access->count);
- return -EINVAL;
- }
-
- return 0;
-}
-
-static int
-handle_region_access(vfu_ctx_t *vfu_ctx, uint32_t size, uint16_t cmd,
- void **data, size_t *len,
- struct vfio_user_region_access *region_access)
-{
- uint64_t count, offset;
- int ret;
- char *buf;
-
- assert(vfu_ctx != NULL);
- assert(data != NULL);
- assert(region_access != NULL);
-
- ret = validate_region_access(vfu_ctx, size, cmd, region_access);
- if (ret < 0) {
- return ret;
- }
-
- *len = sizeof *region_access;
- if (cmd == VFIO_USER_REGION_READ) {
- *len += region_access->count;
- }
- *data = calloc(1, *len);
- if (*data == NULL) {
- return -ENOMEM;
- }
- if (cmd == VFIO_USER_REGION_READ) {
- buf = (char*)(((struct vfio_user_region_access*)(*data)) + 1);
- } else {
- buf = (char*)(region_access + 1);
- }
-
- count = region_access->count;
- offset = region_to_offset(region_access->region) + region_access->offset;
-
- ret = vfu_access(vfu_ctx, cmd == VFIO_USER_REGION_WRITE, buf, count, &offset);
- if (ret != (int)region_access->count) {
- vfu_log(vfu_ctx, LOG_ERR, "failed to %s %#x-%#lx: %d",
- cmd == VFIO_USER_REGION_WRITE ? "write" : "read",
- region_access->count,
- region_access->offset + region_access->count - 1, ret);
- /* FIXME we should return whatever has been accessed, not an error */
- if (ret >= 0) {
- ret = -EINVAL;
- }
- return ret;
- }
-
- region_access = *data;
- region_access->count = ret;
-
- return 0;
-}
-
-static int
handle_dirty_pages_get(vfu_ctx_t *vfu_ctx,
struct iovec **iovecs, size_t *nr_iovecs,
struct vfio_iommu_type1_dirty_bitmap_get *ranges,
@@ -1341,16 +1233,12 @@ copy_sparse_mmap_areas(vfu_reg_info_t *reg_info,
static int
setup_sparse_areas(vfu_reg_info_t *r, struct iovec *mmap_areas,
- uint32_t nr_mmap_areas, int fd)
+ uint32_t nr_mmap_areas)
{
int ret, i;
assert(r != NULL);
- if (fd == -1) {
- return -EBADF;
- }
- r->fd = fd;
ret = copy_sparse_mmap_areas(r, mmap_areas, nr_mmap_areas);
if (ret < 0) {
return ret;
@@ -1367,39 +1255,48 @@ setup_sparse_areas(vfu_reg_info_t *r, struct iovec *mmap_areas,
int
vfu_setup_region(vfu_ctx_t *vfu_ctx, int region_idx, size_t size,
- vfu_region_access_cb_t *region_access, int flags,
- struct iovec *mmap_areas, uint32_t nr_mmap_areas,
- int fd)
+ vfu_region_access_cb_t *cb, int flags,
+ struct iovec *mmap_areas, uint32_t nr_mmap_areas, int fd)
{
+ vfu_reg_info_t *reg;
int ret;
assert(vfu_ctx != NULL);
- switch(region_idx) {
- case VFU_PCI_DEV_BAR0_REGION_IDX ... VFU_PCI_DEV_VGA_REGION_IDX:
- // Validate the config region provided.
- if (region_idx == VFU_PCI_DEV_CFG_REGION_IDX &&
- flags != VFU_REGION_FLAG_RW) {
- return ERROR(EINVAL);
- }
+ if ((mmap_areas == NULL) != (nr_mmap_areas == 0) ||
+ (mmap_areas == NULL) != (fd == -1)) {
+ vfu_log(vfu_ctx, LOG_ERR, "invalid mappable region arguments");
+ return ERROR(EINVAL);
+ }
- vfu_ctx->reg_info[region_idx].flags = flags;
- vfu_ctx->reg_info[region_idx].size = size;
- vfu_ctx->reg_info[region_idx].fn = region_access;
+ if (region_idx < VFU_PCI_DEV_BAR0_REGION_IDX ||
+ region_idx > VFU_PCI_DEV_VGA_REGION_IDX) {
+ vfu_log(vfu_ctx, LOG_ERR, "invalid region index %d", region_idx);
+ return ERROR(EINVAL);
+ }
- if (nr_mmap_areas > 0) {
- ret = setup_sparse_areas(&vfu_ctx->reg_info[region_idx], mmap_areas,
- nr_mmap_areas, fd);
- if (ret < 0) {
- return ERROR(-ret);
- }
- }
- break;
- default:
- vfu_log(vfu_ctx, LOG_ERR, "Invalid region index %d", region_idx);
+ /*
+ * PCI config space is never mappable or of type mem.
+ */
+ if (region_idx == VFU_PCI_DEV_CFG_REGION_IDX &&
+ flags != VFU_REGION_FLAG_RW) {
return ERROR(EINVAL);
}
+ reg = &vfu_ctx->reg_info[region_idx];
+
+ reg->flags = flags;
+ reg->size = size;
+ reg->cb = cb;
+ reg->fd = fd;
+
+ if (nr_mmap_areas > 0) {
+ ret = setup_sparse_areas(reg, mmap_areas, nr_mmap_areas);
+ if (ret < 0) {
+ return ERROR(-ret);
+ }
+ }
+
return 0;
}
diff --git a/lib/migration.c b/lib/migration.c
index 29c667b..91fd09f 100644
--- a/lib/migration.c
+++ b/lib/migration.c
@@ -36,6 +36,7 @@
#include "common.h"
#include "migration.h"
+#include "private.h"
enum migr_iter_state {
VFIO_USER_MIGR_ITER_STATE_INITIAL,
@@ -380,10 +381,10 @@ handle_data_size(vfu_ctx_t *vfu_ctx, struct migration *migr,
}
static ssize_t
-handle_region_access_registers(vfu_ctx_t *vfu_ctx, struct migration *migr,
- char *buf, size_t count,
- loff_t pos, bool is_write)
+migration_region_access_registers(vfu_ctx_t *vfu_ctx, char *buf, size_t count,
+ loff_t pos, bool is_write)
{
+ struct migration *migr = vfu_ctx->migration;
int ret;
assert(migr != NULL);
@@ -430,18 +431,18 @@ handle_region_access_registers(vfu_ctx_t *vfu_ctx, struct migration *migr,
}
ssize_t
-handle_migration_region_access(vfu_ctx_t *vfu_ctx, struct migration *migr,
- char *buf, size_t count,
- loff_t pos, bool is_write)
+migration_region_access(vfu_ctx_t *vfu_ctx, char *buf, size_t count,
+ loff_t pos, bool is_write)
{
- int ret = -EINVAL;
+ struct migration *migr = vfu_ctx->migration;
+ ssize_t ret = -EINVAL;
assert(migr != NULL);
assert(buf != NULL);
if (pos + count <= sizeof(struct vfio_device_migration_info)) {
- ret = handle_region_access_registers(vfu_ctx, migr, buf,
- count, pos, is_write);
+ ret = migration_region_access_registers(vfu_ctx, buf, count,
+ pos, is_write);
} else {
pos -= sizeof(struct vfio_device_migration_info);
if (is_write) {
diff --git a/lib/migration.h b/lib/migration.h
index 6978828..f126c13 100644
--- a/lib/migration.h
+++ b/lib/migration.h
@@ -47,9 +47,8 @@ struct migration *
init_migration(const vfu_migration_t * const vfu_migr, int *err);
ssize_t
-handle_migration_region_access(vfu_ctx_t *vfu_ctx, struct migration *migr,
- char *buf, size_t count,
- loff_t pos, bool is_write);
+migration_region_access(vfu_ctx_t *vfu_ctx, char *buf, size_t count,
+ loff_t pos, bool is_write);
bool
migration_available(vfu_ctx_t *vfu_ctx);
diff --git a/lib/pci.c b/lib/pci.c
index 01b8a27..41a6f9a 100644
--- a/lib/pci.c
+++ b/lib/pci.c
@@ -216,21 +216,18 @@ handle_erom_write(vfu_ctx_t *ctx, vfu_pci_config_space_t *pci,
return 0;
}
-static inline int
-pci_hdr_write(vfu_ctx_t *vfu_ctx, uint16_t offset,
- const char *buf, size_t count)
+static int
+pci_hdr_write(vfu_ctx_t *vfu_ctx, vfu_pci_config_space_t *cfg_space,
+ const char *buf, size_t count, loff_t offset)
{
- vfu_pci_config_space_t *pci;
int ret = 0;
assert(vfu_ctx != NULL);
assert(buf != NULL);
- pci = vfu_pci_get_config_space(vfu_ctx);
-
switch (offset) {
case PCI_COMMAND:
- ret = handle_command_write(vfu_ctx, pci, buf, count);
+ ret = handle_command_write(vfu_ctx, cfg_space, buf, count);
break;
case PCI_STATUS:
vfu_log(vfu_ctx, LOG_INFO, "write to status ignored\n");
@@ -240,12 +237,13 @@ pci_hdr_write(vfu_ctx_t *vfu_ctx, uint16_t offset,
ret = -EINVAL;
break;
case PCI_INTERRUPT_LINE:
- pci->hdr.intr.iline = buf[0];
- vfu_log(vfu_ctx, LOG_DEBUG, "ILINE=%0x\n", pci->hdr.intr.iline);
+ cfg_space->hdr.intr.iline = buf[0];
+ vfu_log(vfu_ctx, LOG_DEBUG, "ILINE=%0x\n", cfg_space->hdr.intr.iline);
break;
case PCI_LATENCY_TIMER:
- pci->hdr.mlt = (uint8_t)buf[0];
- vfu_log(vfu_ctx, LOG_INFO, "set to latency timer to %hhx\n", pci->hdr.mlt);
+ cfg_space->hdr.mlt = (uint8_t)buf[0];
+ vfu_log(vfu_ctx, LOG_INFO, "set to latency timer to %hhx\n",
+ cfg_space->hdr.mlt);
break;
case PCI_BASE_ADDRESS_0:
case PCI_BASE_ADDRESS_1:
@@ -256,7 +254,7 @@ pci_hdr_write(vfu_ctx_t *vfu_ctx, uint16_t offset,
pci_hdr_write_bar(vfu_ctx, BAR_INDEX(offset), buf);
break;
case PCI_ROM_ADDRESS:
- ret = handle_erom_write(vfu_ctx, pci, buf, count);
+ ret = handle_erom_write(vfu_ctx, cfg_space, buf, count);
break;
default:
vfu_log(vfu_ctx, LOG_INFO, "PCI config write %#x-%#lx not handled\n",
@@ -265,84 +263,114 @@ pci_hdr_write(vfu_ctx_t *vfu_ctx, uint16_t offset,
}
#ifdef VFU_VERBOSE_LOGGING
- dump_buffer("PCI header", (char*)pci->hdr.raw, 0xff);
+ dump_buffer("PCI header", (const char *)cfg_space->hdr.raw, 0xff);
#endif
return ret;
}
-/*
- * @pci_hdr: the PCI header
- * @reg_info: region info
- * @rw: the command
- * @write: whether this is a PCI header write
- * @count: output parameter that receives the number of bytes read/written
- */
-int
-pci_hdr_access(vfu_ctx_t *vfu_ctx, uint32_t *count,
- uint64_t *pos, bool is_write, char *buf)
+static int
+pci_hdr_access(vfu_ctx_t *vfu_ctx, char *buf, size_t *countp,
+ loff_t *offsetp, bool is_write)
{
- uint32_t _count;
- loff_t _pos;
+ vfu_pci_config_space_t *cfg_space = vfu_pci_get_config_space(vfu_ctx);
+ size_t count;
int err = 0;
- assert(vfu_ctx != NULL);
- assert(count != NULL);
- assert(pos != NULL);
- assert(buf != NULL);
+ assert(countp != NULL);
+ assert(offsetp != NULL);
- _pos = *pos - region_to_offset(VFU_PCI_DEV_CFG_REGION_IDX);
- _count = MIN(*count, PCI_STD_HEADER_SIZEOF - _pos);
+ count = MIN(*countp, (size_t)(PCI_STD_HEADER_SIZEOF - *offsetp));
if (is_write) {
- err = pci_hdr_write(vfu_ctx, _pos, buf, _count);
+ err = pci_hdr_write(vfu_ctx, cfg_space, buf, count, *offsetp);
} else {
- memcpy(buf, vfu_pci_get_config_space(vfu_ctx)->hdr.raw + _pos, _count);
+ memcpy(buf, cfg_space->hdr.raw + *offsetp, count);
}
- *pos += _count;
- *count -= _count;
- return err;
-}
-static uint32_t
-pci_config_space_size(vfu_ctx_t *vfu_ctx)
-{
- return vfu_ctx->reg_info[VFU_PCI_DEV_CFG_REGION_IDX].size;
+ *countp -= count;
+ *offsetp += count;
+
+ return err;
}
/*
- * Accesses the non-standard part (offset >= 0x40) of the PCI configuration
- * space.
+ * Special handler for config space: we handle all accesses to the standard PCI
+ * header, as well as to any capabilities.
+ *
+ * Outside of those areas, if a callback is specified for the region, we'll use
+ * that; otherwise, writes are not allowed, and reads are satisfied with
+ * memcpy().
+ *
+ * Returns the number of bytes handled, or -errno on error.
*/
ssize_t
pci_config_space_access(vfu_ctx_t *vfu_ctx, char *buf, size_t count,
- loff_t pos, bool is_write)
+ loff_t offset, bool is_write)
{
- int ret;
+ vfu_region_access_cb_t *cb;
+ loff_t start = offset;
+ ssize_t ret;
+ int rc;
+
+ assert(vfu_ctx != NULL);
+
+ cb = vfu_ctx->reg_info[VFU_PCI_DEV_CFG_REGION_IDX].cb;
+
+ if (offset < PCI_STD_HEADER_SIZEOF) {
+ rc = pci_hdr_access(vfu_ctx, buf, &count, &offset, is_write);
+ if (rc < 0) {
+ vfu_log(vfu_ctx, LOG_ERR, "failed to access PCI header: %s",
+ strerror(-rc));
+ dump_buffer("buffer write", buf, count);
+ return rc;
+ }
+ }
+
+ if (count == 0) {
+ return offset - start;
+ }
- count = MIN(pci_config_space_size(vfu_ctx), count);
if (is_write) {
- ret = cap_maybe_access(vfu_ctx, vfu_ctx->pci.caps, buf, count, pos);
- if (ret < 0) {
+ rc = cap_maybe_access(vfu_ctx, vfu_ctx->pci.caps, buf, count, offset);
+
+ if (rc < 0) {
vfu_log(vfu_ctx, LOG_ERR, "bad access to capabilities %#lx-%#lx\n",
- pos, pos + count);
- return ret;
+ offset, offset + count);
+ return rc;
+ } else if (rc > 0) {
+ offset += count;
+ return offset - start;
}
+
+ if (cb == NULL) {
+ vfu_log(vfu_ctx, LOG_ERR, "config space write: no callback");
+ return -EINVAL;
+ }
+
} else {
- /*
- * It is legitimate to read the non-standard part of the PCI config
- * space even if there are no capabilities.
- */
- memcpy(buf, vfu_ctx->pci.config_space->raw + pos, count);
+ if (cb == NULL) {
+ memcpy(buf, vfu_ctx->pci.config_space->raw + offset, count);
+ offset += count;
+ return offset - start;
+ }
}
- return count;
+
+ ret = cb(vfu_ctx, buf, count, offset, is_write);
+ offset += ret;
+
+ if (ret < 0) {
+ return ret;
+ }
+
+ return offset - start;
}
int
vfu_pci_init(vfu_ctx_t *vfu_ctx, vfu_pci_type_t pci_type,
int hdr_type, int revision UNUSED)
{
- vfu_pci_config_space_t *config_space;
+ vfu_pci_config_space_t *cfg_space;
size_t size;
assert(vfu_ctx != NULL);
@@ -377,13 +405,13 @@ vfu_pci_init(vfu_ctx_t *vfu_ctx, vfu_pci_type_t pci_type,
}
// Allocate a buffer for the config space.
- config_space = calloc(1, size);
- if (config_space == NULL) {
+ cfg_space = calloc(1, size);
+ if (cfg_space == NULL) {
return ERROR(ENOMEM);
}
vfu_ctx->pci.type = pci_type;
- vfu_ctx->pci.config_space = config_space;
+ vfu_ctx->pci.config_space = cfg_space;
vfu_ctx->reg_info[VFU_PCI_DEV_CFG_REGION_IDX].size = size;
return 0;
diff --git a/lib/pci.h b/lib/pci.h
index 8678902..b38b12b 100644
--- a/lib/pci.h
+++ b/lib/pci.h
@@ -36,17 +36,6 @@
#include "libvfio-user.h"
#include "private.h"
-static inline bool
-pci_is_hdr_access(uint64_t pos)
-{
- const uint64_t off = region_to_offset(VFU_PCI_DEV_CFG_REGION_IDX);
- return pos >= off && pos - off < PCI_STD_HEADER_SIZEOF;
-}
-
-int
-pci_hdr_access(vfu_ctx_t *vfu_ctx, uint32_t *count,
- uint64_t *pos, bool is_write, char *buf);
-
ssize_t
pci_config_space_access(vfu_ctx_t *vfu_ctx, char *buf, size_t count,
loff_t pos, bool is_write);
diff --git a/lib/private.h b/lib/private.h
index 871ba29..a3b90f5 100644
--- a/lib/private.h
+++ b/lib/private.h
@@ -35,9 +35,6 @@
#include "dma.h"
-#define VFU_REGION_SHIFT 40
-#define VFU_REGION_MASK ((1ULL << VFU_REGION_SHIFT) - 1)
-
static inline int
ERROR(int err)
{
@@ -45,13 +42,6 @@ ERROR(int err)
return -1;
}
-#ifdef VFU_VERBOSE_LOGGING
-void
-dump_buffer(const char *prefix, const char *buf, uint32_t count);
-#else
-#define dump_buffer(prefix, buf, count)
-#endif
-
struct transport_ops {
int (*init)(vfu_ctx_t*);
int (*attach)(vfu_ctx_t*);
@@ -99,7 +89,7 @@ typedef struct {
* Note that the memory of the region is owned by the user, except for the
* standard header (first 64 bytes) of the PCI configuration space.
*/
- vfu_region_access_cb_t *fn;
+ vfu_region_access_cb_t *cb;
struct vfu_sparse_mmap_areas *mmap_areas; /* sparse mmap areas */
int fd;
@@ -139,25 +129,12 @@ struct vfu_ctx {
vfu_dev_type_t dev_type;
};
-int
-vfu_pci_hdr_access(vfu_ctx_t *vfu_ctx, uint32_t *count,
- uint64_t *pos, bool write, char *buf);
+void
+dump_buffer(const char *prefix, const char *buf, uint32_t count);
vfu_reg_info_t *
vfu_get_region_info(vfu_ctx_t *vfu_ctx);
-static inline uint64_t
-region_to_offset(uint32_t region)
-{
- return (uint64_t)region << VFU_REGION_SHIFT;
-}
-
-static inline uint32_t
-offset_to_region(uint64_t offset)
-{
- return (offset >> VFU_REGION_SHIFT) & VFU_REGION_MASK;
-}
-
int
handle_dma_map_or_unmap(vfu_ctx_t *vfu_ctx, uint32_t size, bool map,
int *fds, size_t nr_fds,
diff --git a/test/unit-tests.c b/test/unit-tests.c
index 6fb89a4..30cc71c 100644
--- a/test/unit-tests.c
+++ b/test/unit-tests.c
@@ -471,24 +471,25 @@ test_get_region_info(UNUSED void **state)
dev_get_reginfo(&vfu_ctx, index, argsz, &vfio_reg,
&fds, &nr_fds));
assert_int_equal(sizeof(struct vfio_region_info), vfio_reg->argsz);
- assert_int_equal(VFU_REGION_FLAG_RW, vfio_reg->flags);
+ assert_int_equal(VFIO_REGION_INFO_FLAG_READ | VFIO_REGION_INFO_FLAG_WRITE |
+ VFIO_REGION_INFO_FLAG_MMAP, vfio_reg->flags);
assert_int_equal(1, vfio_reg->index);
- assert_int_equal(0x10000000000, region_to_offset(vfio_reg->index));
+ assert_int_equal(0x10000000000, vfio_reg->offset);
assert_int_equal(0xdeadbeef, vfio_reg->size);
assert_int_equal(0, nr_fds);
/* regions caps (sparse mmap) but argsz too small */
- mmap_areas->nr_mmap_areas = 1;
+ mmap_areas->nr_mmap_areas = 1;
mmap_areas->areas[0].iov_base = (void*)0x8badf00d;
mmap_areas->areas[0].iov_len = 0x0d15ea5e;
vfu_ctx.reg_info[1].mmap_areas = mmap_areas;
- vfu_ctx.reg_info[1].flags |= VFIO_REGION_INFO_FLAG_MMAP;
assert_int_equal(0,
dev_get_reginfo(&vfu_ctx, index, argsz, &vfio_reg,
&fds, &nr_fds));
assert_int_equal(argsz + sizeof(struct vfio_region_info_cap_sparse_mmap) + sizeof(struct vfio_region_sparse_mmap_area),
vfio_reg->argsz);
- assert_int_equal(VFU_REGION_FLAG_RW | VFIO_REGION_INFO_FLAG_MMAP | VFIO_REGION_INFO_FLAG_CAPS,
+ assert_int_equal(VFIO_REGION_INFO_FLAG_READ | VFIO_REGION_INFO_FLAG_WRITE |
+ VFIO_REGION_INFO_FLAG_MMAP | VFIO_REGION_INFO_FLAG_CAPS,
vfio_reg->flags);
assert_int_equal(0, nr_fds);
@@ -668,25 +669,36 @@ test_setup_sparse_region(void **state __attribute__((unused)))
.iov_len = 0x1000
}
};
+ int ret;
- /* bad fd */
- assert_int_equal(-1,
- vfu_setup_region(&vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX,
- 0x2000, NULL, 0, mmap_areas, 2, -1));
- assert_int_equal(EBADF, errno);
+ /* invalid mappable settings */
+ ret = vfu_setup_region(&vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX,
+ 0x2000, NULL, 0, mmap_areas, 2, -1);
+ assert_int_equal(-1, ret);
+ assert_int_equal(EINVAL, errno);
+
+ ret = vfu_setup_region(&vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX,
+ 0x2000, NULL, 0, NULL, 0, 1);
+ assert_int_equal(-1, ret);
+ assert_int_equal(EINVAL, errno);
+
+ ret = vfu_setup_region(&vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX,
+ 0x2000, NULL, 0, mmap_areas, 0, 1);
+ assert_int_equal(-1, ret);
+ assert_int_equal(EINVAL, errno);
/* sparse region exceeds region size */
mmap_areas[1].iov_len = 0x1001;
- assert_int_equal(-1,
- vfu_setup_region(&vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX,
- 0x2000, NULL, 0, mmap_areas, 2, 0));
+ ret = vfu_setup_region(&vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX,
+ 0x2000, NULL, 0, mmap_areas, 2, 0);
+ assert_int_equal(-1, ret);
assert_int_equal(EINVAL, errno);
/* sparse region within region size */
mmap_areas[1].iov_len = 0x1000;
- assert_int_equal(0,
- vfu_setup_region(&vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX,
- 0x2000, NULL, 0, mmap_areas, 2, 0));
+ ret = vfu_setup_region(&vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX,
+ 0x2000, NULL, 0, mmap_areas, 2, 0);
+ assert_int_equal(0, ret);
}
static void