diff options
author | John Levon <john.levon@nutanix.com> | 2021-01-07 19:55:44 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-01-07 19:55:44 +0000 |
commit | 6ec31642f6253f5c19187c1ffb396d5921138b67 (patch) | |
tree | 9dd88104b0d56e63a83a48efbec6887cafc26730 | |
parent | 70524c550322948765415d9b0eb29ac766e32e79 (diff) | |
download | libvfio-user-6ec31642f6253f5c19187c1ffb396d5921138b67.zip libvfio-user-6ec31642f6253f5c19187c1ffb396d5921138b67.tar.gz libvfio-user-6ec31642f6253f5c19187c1ffb396d5921138b67.tar.bz2 |
re-work access handling (#220)
Various cleanups and fixes to handling of region accesses, including:
- there should be no reason for us to split accesses into 1/2/4/8 byte accesses:
in general, the client will have already be doing that, and if not, there's no
particular reason we should be the ones to split up such larger accesses.
- use a callback for PCI config space reads and writes if one is provided (needs
more work for capabilities)
Signed-off-by: John Levon <john.levon@nutanix.com>
Reviewed-by: Swapnil Ingle <swapnil.ingle@nutanix.com>
Reviewed-by: Thanos Makatos <thanos.makatos@nutanix.com>
-rw-r--r-- | include/libvfio-user.h | 76 | ||||
-rw-r--r-- | lib/cap.c | 4 | ||||
-rw-r--r-- | lib/libvfio-user.c | 501 | ||||
-rw-r--r-- | lib/migration.c | 19 | ||||
-rw-r--r-- | lib/migration.h | 5 | ||||
-rw-r--r-- | lib/pci.c | 148 | ||||
-rw-r--r-- | lib/pci.h | 11 | ||||
-rw-r--r-- | lib/private.h | 29 | ||||
-rw-r--r-- | test/unit-tests.c | 44 |
9 files changed, 367 insertions, 470 deletions
diff --git a/include/libvfio-user.h b/include/libvfio-user.h index 61f94ca..d74087e 100644 --- a/include/libvfio-user.h +++ b/include/libvfio-user.h @@ -200,37 +200,18 @@ typedef ssize_t (vfu_region_access_cb_t)(vfu_ctx_t *vfu_ctx, char *buf, #define VFU_REGION_FLAG_READ (1 << 0) #define VFU_REGION_FLAG_WRITE (1 << 1) -#define VFU_REGION_FLAG_MMAP (1 << 2) // TODO: how this relates to IO bar? #define VFU_REGION_FLAG_RW (VFU_REGION_FLAG_READ | VFU_REGION_FLAG_WRITE) -#define VFU_REGION_FLAG_MEM (1 << 3) // if unset, bar is IO +#define VFU_REGION_FLAG_MEM (1 << 2) // if unset, bar is IO /** - * Set up a region. + * Set up a device region. * - * If this is the PCI configuration space, the @size argument is ignored. The - * size of the region is determined by the PCI type (set when the libvfio-user - * context is created). Accesses to the PCI configuration space header and the - * PCI capabilities are handled internally; the user supplied callback is not - * called. + * A region is an area of device memory that can be accessed by the client, + * either via VFIO_USER_REGION_READ/WRITE, or directly by mapping the region + * into the client's address space if an fd is given. * - * @vfu_ctx: the libvfio-user context - * @region_idx: region index - * @size: size of the region - * @region_access: callback function to access region. If the region is memory - * mappable and the client accesses the region or part of sparse area, then - * the callback is not called. - * @flags: region flags (VFU_REGION_FLAG_) - * @mmap_areas: array of memory mappable areas. This array provides to the - * server greater control of which specific areas should be memory mapped by - * the client. Each element in the @mmap_areas array describes one such area. - * Ignored if @nr_mmap_areas is 0 or if the region is not memory mappable. - * @nr_mmap_areas: number of sparse areas in @mmap_areas. Must be 0 if the - * region is not memory mappable. - * @fd: file descriptor of the file backing the region if it's a mappable - * region. It is the server's responsibility to create a file suitable for - * memory mapping by the client. Ignored if the region is not memory mappable. - * - * A note on memory-mappable regions: the client can memory map any part of the + * A mappable region can be split into mappable sub-areas according to the + * @mmap_areas array. Note that the client can memory map any part of the * file descriptor, even if not supposed to do so according to @mmap_areas. * There is no way in Linux to avoid this. * @@ -241,6 +222,35 @@ typedef ssize_t (vfu_region_access_cb_t)(vfu_ctx_t *vfu_ctx, char *buf, * descriptors of these DM targets. This is something we can document and * demonstrate in a sample. * + * Areas that are accessed via such a mapping by definition do not invoke any + * given callback. However, the callback can still be invoked, even on a + * mappable area, if the client chooses to call VFIO_USER_REGION_READ/WRITE. + * + * A VFU_PCI_DEV_CFG_REGION_IDX region, corresponding to PCI config space, has + * special handling: + * + * - the @size argument is ignored: the region size is always the size defined + * by the relevant PCI specification + * - all accesses to the standard PCI header (i.e. the first 64 bytes of the + * region) are handled by the library + * - all accesses to known PCI capabilities are handled by the library + * - if no callback is provided, reads to other areas are a simple memcpy(), + * and writes are an error + * - otherwise, the callback is expected to handle the access + * + * @vfu_ctx: the libvfio-user context + * @region_idx: region index + * @size: size of the region + * @region_access: callback function to access region + * @flags: region flags (VFU_REGION_FLAG_) + * @mmap_areas: array of memory mappable areas; must be provided if the region + * is mappable. + * @nr_mmap_areas: number of sparse areas in @mmap_areas; must be provided if + * the region is mappable. + * @fd: file descriptor of the file backing the region if the region is + * mappable; it is the server's responsibility to create a file suitable for + * memory mapping by the client. + * * @returns 0 on success, -1 on error, Sets errno. */ int @@ -490,20 +500,6 @@ void vfu_unmap_sg(vfu_ctx_t *vfu_ctx, const dma_sg_t *sg, struct iovec *iov, int cnt); -//FIXME: Remove if we dont need this. -/** - * Returns the PCI region given the position and size of an address span in the - * PCI configuration space. - * - * @pos: offset of the address span - * @count: size of the address span - * @off: output parameter that receives the relative offset within the region. - * - * Returns the PCI region (VFU_PCI_DEV_XXX_REGION_IDX), or -errno on error. - */ -int -vfu_get_region(loff_t pos, size_t count, loff_t *off); - /** * Read from the dma region exposed by the client. * @@ -83,9 +83,7 @@ cap_is_accessed(struct cap *caps, int nr_caps, size_t count, loff_t offset) } /* - * FIXME write starts before capabilities but extends into them. I don't - * think that the while loop in vfu_access will allow this in the first - * place. + * FIXME write starts before capabilities but extends into them. */ assert(false); } else if (offset > caps[nr_caps - 1].end) { diff --git a/lib/libvfio-user.c b/lib/libvfio-user.c index 4e9f58c..c0503ff 100644 --- a/lib/libvfio-user.c +++ b/lib/libvfio-user.c @@ -165,10 +165,11 @@ dev_get_caps(vfu_ctx_t *vfu_ctx, vfu_reg_info_t *vfu_reg, bool is_migr_reg, return 0; } -#ifdef VFU_VERBOSE_LOGGING -void -dump_buffer(const char *prefix, const char *buf, uint32_t count) +inline void +dump_buffer(const char *prefix UNUSED, const char *buf UNUSED, + uint32_t count UNUSED) { +#ifdef VFU_VERBOSE_LOGGING int i; const size_t bytes_per_line = 0x8; @@ -188,10 +189,8 @@ dump_buffer(const char *prefix, const char *buf, uint32_t count) if (i % bytes_per_line != 0) { fprintf(stderr, "\n"); } -} -#else -#define dump_buffer(prefix, buf, count) #endif +} static bool is_migr_reg(vfu_ctx_t *vfu_ctx, int index) @@ -199,238 +198,221 @@ is_migr_reg(vfu_ctx_t *vfu_ctx, int index) return &vfu_ctx->reg_info[index] == vfu_ctx->migr_reg; } -long -dev_get_reginfo(vfu_ctx_t *vfu_ctx, uint32_t index, uint32_t argsz, - struct vfio_region_info **vfio_reg, int **fds, size_t *nr_fds) +static ssize_t +region_access(vfu_ctx_t *vfu_ctx, size_t region_index, char *buf, + size_t count, uint64_t offset, bool is_write) { - vfu_reg_info_t *vfu_reg; - size_t caps_size; + ssize_t ret; assert(vfu_ctx != NULL); - assert(vfio_reg != NULL); + assert(buf != NULL); - vfu_reg = &vfu_ctx->reg_info[index]; + vfu_log(vfu_ctx, LOG_DEBUG, "%s %zu %#lx-%#lx", is_write ? "W" : "R", + region_index, offset, offset + count); - if (index >= vfu_ctx->nr_regions) { - vfu_log(vfu_ctx, LOG_DEBUG, "bad region index %d in get region info", - index); - return -EINVAL; - } - - if (argsz < sizeof(struct vfio_region_info)) { - vfu_log(vfu_ctx, LOG_DEBUG, "bad argsz %d", argsz); - return -EINVAL; + if (is_write) { + dump_buffer("buffer write", buf, count); } - /* - * TODO We assume that the client expects to receive argsz bytes. - */ - *vfio_reg = calloc(1, argsz); - if (!*vfio_reg) { - return -ENOMEM; - } - caps_size = get_vfio_caps_size(is_migr_reg(vfu_ctx, index), - vfu_reg->mmap_areas); - (*vfio_reg)->argsz = caps_size + sizeof(struct vfio_region_info); - (*vfio_reg)->flags = vfu_reg->flags; - (*vfio_reg)->index = index; - (*vfio_reg)->offset = region_to_offset((*vfio_reg)->index); - (*vfio_reg)->size = vfu_reg->size; + if (region_index == VFU_PCI_DEV_CFG_REGION_IDX) { + ret = pci_config_space_access(vfu_ctx, buf, count, offset, is_write); + } else if (is_migr_reg(vfu_ctx, region_index)) { + ret = migration_region_access(vfu_ctx, buf, count, offset, is_write); + } else { + vfu_region_access_cb_t *cb = vfu_ctx->reg_info[region_index].cb; - *nr_fds = 0; - if (caps_size > 0) { - if (vfu_reg->mmap_areas != NULL) { - (*vfio_reg)->flags |= VFIO_REGION_INFO_FLAG_CAPS; - } - if (argsz >= (*vfio_reg)->argsz) { - dev_get_caps(vfu_ctx, vfu_reg, is_migr_reg(vfu_ctx, index), - *vfio_reg, fds, nr_fds); + if (cb == NULL) { + vfu_log(vfu_ctx, LOG_ERR, "no callback for region %d", + region_index); + return -EINVAL; } + + ret = cb(vfu_ctx, buf, count, offset, is_write); } - vfu_log(vfu_ctx, LOG_DEBUG, "region_info[%d] offset %#llx flags %#x size %llu " - "argsz %u", - (*vfio_reg)->index, (*vfio_reg)->offset, (*vfio_reg)->flags, - (*vfio_reg)->size, (*vfio_reg)->argsz); + if (!is_write && (size_t)ret == count) { + dump_buffer("buffer read", buf, count); + } - return 0; + return ret; } -int -vfu_get_region(loff_t pos, size_t count, loff_t *off) +static bool +is_valid_region_access(vfu_ctx_t *vfu_ctx, size_t size, uint16_t cmd, + struct vfio_user_region_access *ra) { - int r; + size_t index; + + assert(vfu_ctx != NULL); + assert(ra != NULL); - assert(off != NULL); + if (size < sizeof (*ra)) { + vfu_log(vfu_ctx, LOG_ERR, "message size too small (%d)", size); + return false; + } - r = offset_to_region(pos); - if ((int)offset_to_region(pos + count) != r) { - return -ENOENT; + if (cmd == VFIO_USER_REGION_WRITE && size - sizeof (*ra) != ra->count) { + vfu_log(vfu_ctx, LOG_ERR, "region write count too small: " + "expected %lu, got %u", size - sizeof (*ra), ra->count); + return false; } - *off = pos - region_to_offset(r); - return r; + index = ra->region; + + if (index >= vfu_ctx->nr_regions) { + vfu_log(vfu_ctx, LOG_ERR, "bad region index %u", index); + return false; + } + + // FIXME: need to audit later for wraparound + if (ra->offset + ra->count > vfu_ctx->reg_info[index].size) { + vfu_log(vfu_ctx, LOG_ERR, "out of bounds region access %#lx-%#lx " + "(size %#lx)", ra->offset, ra->offset + ra->count, + vfu_ctx->reg_info[index].size); + + return false; + } + + if (device_is_stopped_and_copying(vfu_ctx->migration) && + !is_migr_reg(vfu_ctx, index)) { + vfu_log(vfu_ctx, LOG_ERR, + "cannot access region %u while device in stop-and-copy state", + index); + return false; + } + + return true; } -static ssize_t -do_access(vfu_ctx_t *vfu_ctx, char *buf, uint8_t count, uint64_t pos, bool is_write) +static int +handle_region_access(vfu_ctx_t *vfu_ctx, uint32_t size, uint16_t cmd, + void **data, size_t *len, + struct vfio_user_region_access *ra) { - int idx; - loff_t offset; + ssize_t ret; + char *buf; assert(vfu_ctx != NULL); - assert(buf != NULL); - assert(count == 1 || count == 2 || count == 4 || count == 8); - - idx = vfu_get_region(pos, count, &offset); - if (idx < 0) { - vfu_log(vfu_ctx, LOG_ERR, "invalid region %d", idx); - return idx; - } + assert(data != NULL); + assert(ra != NULL); - if (idx < 0 || idx >= (int)vfu_ctx->nr_regions) { - vfu_log(vfu_ctx, LOG_ERR, "bad region %d", idx); + if (!is_valid_region_access(vfu_ctx, size, cmd, ra)) { return -EINVAL; } - if (idx == VFU_PCI_DEV_CFG_REGION_IDX) { - return pci_config_space_access(vfu_ctx, buf, count, offset, is_write); + if (ra->count == 0) { + return 0; } - if (is_migr_reg(vfu_ctx, idx)) { - if (offset + count > vfu_ctx->reg_info[idx].size) { - vfu_log(vfu_ctx, LOG_ERR, "read %#lx-%#lx past end of migration region (%#x)", - offset, offset + count - 1, - vfu_ctx->reg_info[idx].size); - return -EINVAL; - } - return handle_migration_region_access(vfu_ctx, vfu_ctx->migration, - buf, count, offset, is_write); + *len = sizeof (*ra); + if (cmd == VFIO_USER_REGION_READ) { + *len += ra->count; + } + *data = calloc(1, *len); + if (*data == NULL) { + return -ENOMEM; + } + if (cmd == VFIO_USER_REGION_READ) { + buf = (char *)(((struct vfio_user_region_access*)(*data)) + 1); + } else { + buf = (char *)(ra + 1); } - /* - * Checking whether a callback exists might sound expensive however this - * code is not performance critical. This works well when we don't expect a - * region to be used, so the user of the library can simply leave the - * callback NULL in vfu_create_ctx. - */ - if (vfu_ctx->reg_info[idx].fn != NULL) { - return vfu_ctx->reg_info[idx].fn(vfu_ctx, buf, count, offset, is_write); + ret = region_access(vfu_ctx, ra->region, buf, ra->count, ra->offset, + cmd == VFIO_USER_REGION_WRITE); + + if (ret != ra->count) { + vfu_log(vfu_ctx, LOG_ERR, "failed to %s %#x-%#lx: %d", + cmd == VFIO_USER_REGION_WRITE ? "write" : "read", + ra->count, ra->offset + ra->count - 1, ret); + /* FIXME we should return whatever has been accessed, not an error */ + if (ret >= 0) { + ret = -EINVAL; + } + return ret; } - vfu_log(vfu_ctx, LOG_ERR, "no callback for region %d", idx); + ra = *data; + ra->count = ret; - return -EINVAL; + return 0; } -/* - * Returns the number of bytes processed on success or a negative number on - * error. - * - * TODO function naming, general cleanup of access path - * FIXME we must be able to return values up to uint32_t bit, or negative on - * error. Better to make return value an int and return the number of bytes - * processed via an argument. - */ -static ssize_t -_vfu_access(vfu_ctx_t *vfu_ctx, char *buf, uint32_t count, uint64_t *ppos, - bool is_write) -{ - uint32_t done = 0; - int ret; - - assert(vfu_ctx != NULL); - /* buf and ppos can be NULL if count is 0 */ +#define VFU_REGION_SHIFT 40 - while (count) { - size_t size; - /* - * Limit accesses to qword and enforce alignment. Figure out whether - * the PCI spec requires this - * FIXME while this makes sense for registers, we might be able to relax - * this requirement and make some transfers more efficient. Maybe make - * this a per-region option that can be set by the user? - */ - if (count >= 8 && !(*ppos % 8)) { - size = 8; - } else if (count >= 4 && !(*ppos % 4)) { - size = 4; - } else if (count >= 2 && !(*ppos % 2)) { - size = 2; - } else { - size = 1; - } - ret = do_access(vfu_ctx, buf, size, *ppos, is_write); - if (ret <= 0) { - vfu_log(vfu_ctx, LOG_ERR, "failed to %s %#lx-%#lx: %s", - is_write ? "write to" : "read from", *ppos, *ppos + size - 1, - strerror(-ret)); - /* - * TODO if ret < 0 then it might contain a legitimate error code, why replace it with EFAULT? - */ - return -EFAULT; - } - if (ret != (int)size) { - vfu_log(vfu_ctx, LOG_DEBUG, "bad read %d != %ld", ret, size); - } - count -= size; - done += size; - *ppos += size; - buf += size; - } - return done; +static inline uint64_t +region_to_offset(uint32_t region) +{ + return (uint64_t)region << VFU_REGION_SHIFT; } -static inline int -vfu_access(vfu_ctx_t *vfu_ctx, bool is_write, char *rwbuf, uint32_t count, - uint64_t *pos) +long +dev_get_reginfo(vfu_ctx_t *vfu_ctx, uint32_t index, uint32_t argsz, + struct vfio_region_info **vfio_reg, int **fds, size_t *nr_fds) { - uint32_t processed = 0, _count; - int ret; + vfu_reg_info_t *vfu_reg; + size_t caps_size; assert(vfu_ctx != NULL); - assert(rwbuf != NULL); - assert(pos != NULL); + assert(vfio_reg != NULL); - vfu_log(vfu_ctx, LOG_DEBUG, "%s %#lx-%#lx", is_write ? "W" : "R", *pos, - *pos + count - 1); + vfu_reg = &vfu_ctx->reg_info[index]; -#ifdef VFU_VERBOSE_LOGGING - if (is_write) { - dump_buffer("buffer write", rwbuf, count); + if (index >= vfu_ctx->nr_regions) { + vfu_log(vfu_ctx, LOG_DEBUG, "bad region index %d in get region info", + index); + return -EINVAL; } -#endif - _count = count; - - if (pci_is_hdr_access(*pos)) { - ret = pci_hdr_access(vfu_ctx, &_count, pos, is_write, rwbuf); - if (ret != 0) { - /* FIXME shouldn't we fail here? */ - vfu_log(vfu_ctx, LOG_ERR, "failed to access PCI header: %s", - strerror(-ret)); -#ifdef VFU_VERBOSE_LOGGING - dump_buffer("buffer write", rwbuf, _count); -#endif - } + if (argsz < sizeof(struct vfio_region_info)) { + vfu_log(vfu_ctx, LOG_DEBUG, "bad argsz %d", argsz); + return -EINVAL; } /* - * count is how much has been processed by pci_hdr_access, - * _count is how much there's left to be processed by vfu_access + * TODO We assume that the client expects to receive argsz bytes. */ - processed = count - _count; - ret = _vfu_access(vfu_ctx, rwbuf + processed, _count, pos, is_write); - if (ret >= 0) { - ret += processed; -#ifdef VFU_VERBOSE_LOGGING - if (!is_write && err == ret) { - dump_buffer("buffer read", rwbuf, ret); + *vfio_reg = calloc(1, argsz); + if (!*vfio_reg) { + return -ENOMEM; + } + caps_size = get_vfio_caps_size(is_migr_reg(vfu_ctx, index), + vfu_reg->mmap_areas); + (*vfio_reg)->argsz = caps_size + sizeof(struct vfio_region_info); + (*vfio_reg)->index = index; + (*vfio_reg)->offset = region_to_offset((*vfio_reg)->index); + (*vfio_reg)->size = vfu_reg->size; + + (*vfio_reg)->flags = 0; + + if (vfu_reg->flags & VFU_REGION_FLAG_READ) { + (*vfio_reg)->flags |= VFIO_REGION_INFO_FLAG_READ; + } + if (vfu_reg->flags & VFU_REGION_FLAG_WRITE) { + (*vfio_reg)->flags |= VFIO_REGION_INFO_FLAG_WRITE; + } + + if (vfu_reg->fd != -1) { + (*vfio_reg)->flags |= VFIO_REGION_INFO_FLAG_MMAP; + } + + *nr_fds = 0; + if (caps_size > 0) { + if (vfu_reg->mmap_areas != NULL) { + (*vfio_reg)->flags |= VFIO_REGION_INFO_FLAG_CAPS; + } + if (argsz >= (*vfio_reg)->argsz) { + dev_get_caps(vfu_ctx, vfu_reg, is_migr_reg(vfu_ctx, index), + *vfio_reg, fds, nr_fds); } -#endif } - return ret; + vfu_log(vfu_ctx, LOG_DEBUG, "region_info[%d] offset %#llx flags %#x size %llu " + "argsz %u", + (*vfio_reg)->index, (*vfio_reg)->offset, (*vfio_reg)->flags, + (*vfio_reg)->size, (*vfio_reg)->argsz); + + return 0; } /* TODO merge with dev_get_reginfo */ @@ -595,96 +577,6 @@ handle_device_reset(vfu_ctx_t *vfu_ctx) } static int -validate_region_access(vfu_ctx_t *vfu_ctx, uint32_t size, uint16_t cmd, - struct vfio_user_region_access *region_access) -{ - assert(region_access != NULL); - - if (size < sizeof *region_access) { - vfu_log(vfu_ctx, LOG_ERR, "message size too small (%d)", size); - return -EINVAL; - } - - if (region_access->region > vfu_ctx->nr_regions || region_access->count <= 0) { - vfu_log(vfu_ctx, LOG_ERR, "bad region %d and/or count %d", - region_access->region, region_access->count); - return -EINVAL; - } - - if (device_is_stopped_and_copying(vfu_ctx->migration) && - !is_migr_reg(vfu_ctx, region_access->region)) { - vfu_log(vfu_ctx, LOG_ERR, - "cannot access region %d while device in stop-and-copy state", - region_access->region); - return -EINVAL; - } - - if (cmd == VFIO_USER_REGION_WRITE && - size - sizeof *region_access != region_access->count) - { - vfu_log(vfu_ctx, LOG_ERR, "bad region access, expected %lu, actual %d", - size - sizeof *region_access, region_access->count); - return -EINVAL; - } - - return 0; -} - -static int -handle_region_access(vfu_ctx_t *vfu_ctx, uint32_t size, uint16_t cmd, - void **data, size_t *len, - struct vfio_user_region_access *region_access) -{ - uint64_t count, offset; - int ret; - char *buf; - - assert(vfu_ctx != NULL); - assert(data != NULL); - assert(region_access != NULL); - - ret = validate_region_access(vfu_ctx, size, cmd, region_access); - if (ret < 0) { - return ret; - } - - *len = sizeof *region_access; - if (cmd == VFIO_USER_REGION_READ) { - *len += region_access->count; - } - *data = calloc(1, *len); - if (*data == NULL) { - return -ENOMEM; - } - if (cmd == VFIO_USER_REGION_READ) { - buf = (char*)(((struct vfio_user_region_access*)(*data)) + 1); - } else { - buf = (char*)(region_access + 1); - } - - count = region_access->count; - offset = region_to_offset(region_access->region) + region_access->offset; - - ret = vfu_access(vfu_ctx, cmd == VFIO_USER_REGION_WRITE, buf, count, &offset); - if (ret != (int)region_access->count) { - vfu_log(vfu_ctx, LOG_ERR, "failed to %s %#x-%#lx: %d", - cmd == VFIO_USER_REGION_WRITE ? "write" : "read", - region_access->count, - region_access->offset + region_access->count - 1, ret); - /* FIXME we should return whatever has been accessed, not an error */ - if (ret >= 0) { - ret = -EINVAL; - } - return ret; - } - - region_access = *data; - region_access->count = ret; - - return 0; -} - -static int handle_dirty_pages_get(vfu_ctx_t *vfu_ctx, struct iovec **iovecs, size_t *nr_iovecs, struct vfio_iommu_type1_dirty_bitmap_get *ranges, @@ -1341,16 +1233,12 @@ copy_sparse_mmap_areas(vfu_reg_info_t *reg_info, static int setup_sparse_areas(vfu_reg_info_t *r, struct iovec *mmap_areas, - uint32_t nr_mmap_areas, int fd) + uint32_t nr_mmap_areas) { int ret, i; assert(r != NULL); - if (fd == -1) { - return -EBADF; - } - r->fd = fd; ret = copy_sparse_mmap_areas(r, mmap_areas, nr_mmap_areas); if (ret < 0) { return ret; @@ -1367,39 +1255,48 @@ setup_sparse_areas(vfu_reg_info_t *r, struct iovec *mmap_areas, int vfu_setup_region(vfu_ctx_t *vfu_ctx, int region_idx, size_t size, - vfu_region_access_cb_t *region_access, int flags, - struct iovec *mmap_areas, uint32_t nr_mmap_areas, - int fd) + vfu_region_access_cb_t *cb, int flags, + struct iovec *mmap_areas, uint32_t nr_mmap_areas, int fd) { + vfu_reg_info_t *reg; int ret; assert(vfu_ctx != NULL); - switch(region_idx) { - case VFU_PCI_DEV_BAR0_REGION_IDX ... VFU_PCI_DEV_VGA_REGION_IDX: - // Validate the config region provided. - if (region_idx == VFU_PCI_DEV_CFG_REGION_IDX && - flags != VFU_REGION_FLAG_RW) { - return ERROR(EINVAL); - } + if ((mmap_areas == NULL) != (nr_mmap_areas == 0) || + (mmap_areas == NULL) != (fd == -1)) { + vfu_log(vfu_ctx, LOG_ERR, "invalid mappable region arguments"); + return ERROR(EINVAL); + } - vfu_ctx->reg_info[region_idx].flags = flags; - vfu_ctx->reg_info[region_idx].size = size; - vfu_ctx->reg_info[region_idx].fn = region_access; + if (region_idx < VFU_PCI_DEV_BAR0_REGION_IDX || + region_idx > VFU_PCI_DEV_VGA_REGION_IDX) { + vfu_log(vfu_ctx, LOG_ERR, "invalid region index %d", region_idx); + return ERROR(EINVAL); + } - if (nr_mmap_areas > 0) { - ret = setup_sparse_areas(&vfu_ctx->reg_info[region_idx], mmap_areas, - nr_mmap_areas, fd); - if (ret < 0) { - return ERROR(-ret); - } - } - break; - default: - vfu_log(vfu_ctx, LOG_ERR, "Invalid region index %d", region_idx); + /* + * PCI config space is never mappable or of type mem. + */ + if (region_idx == VFU_PCI_DEV_CFG_REGION_IDX && + flags != VFU_REGION_FLAG_RW) { return ERROR(EINVAL); } + reg = &vfu_ctx->reg_info[region_idx]; + + reg->flags = flags; + reg->size = size; + reg->cb = cb; + reg->fd = fd; + + if (nr_mmap_areas > 0) { + ret = setup_sparse_areas(reg, mmap_areas, nr_mmap_areas); + if (ret < 0) { + return ERROR(-ret); + } + } + return 0; } diff --git a/lib/migration.c b/lib/migration.c index 29c667b..91fd09f 100644 --- a/lib/migration.c +++ b/lib/migration.c @@ -36,6 +36,7 @@ #include "common.h" #include "migration.h" +#include "private.h" enum migr_iter_state { VFIO_USER_MIGR_ITER_STATE_INITIAL, @@ -380,10 +381,10 @@ handle_data_size(vfu_ctx_t *vfu_ctx, struct migration *migr, } static ssize_t -handle_region_access_registers(vfu_ctx_t *vfu_ctx, struct migration *migr, - char *buf, size_t count, - loff_t pos, bool is_write) +migration_region_access_registers(vfu_ctx_t *vfu_ctx, char *buf, size_t count, + loff_t pos, bool is_write) { + struct migration *migr = vfu_ctx->migration; int ret; assert(migr != NULL); @@ -430,18 +431,18 @@ handle_region_access_registers(vfu_ctx_t *vfu_ctx, struct migration *migr, } ssize_t -handle_migration_region_access(vfu_ctx_t *vfu_ctx, struct migration *migr, - char *buf, size_t count, - loff_t pos, bool is_write) +migration_region_access(vfu_ctx_t *vfu_ctx, char *buf, size_t count, + loff_t pos, bool is_write) { - int ret = -EINVAL; + struct migration *migr = vfu_ctx->migration; + ssize_t ret = -EINVAL; assert(migr != NULL); assert(buf != NULL); if (pos + count <= sizeof(struct vfio_device_migration_info)) { - ret = handle_region_access_registers(vfu_ctx, migr, buf, - count, pos, is_write); + ret = migration_region_access_registers(vfu_ctx, buf, count, + pos, is_write); } else { pos -= sizeof(struct vfio_device_migration_info); if (is_write) { diff --git a/lib/migration.h b/lib/migration.h index 6978828..f126c13 100644 --- a/lib/migration.h +++ b/lib/migration.h @@ -47,9 +47,8 @@ struct migration * init_migration(const vfu_migration_t * const vfu_migr, int *err); ssize_t -handle_migration_region_access(vfu_ctx_t *vfu_ctx, struct migration *migr, - char *buf, size_t count, - loff_t pos, bool is_write); +migration_region_access(vfu_ctx_t *vfu_ctx, char *buf, size_t count, + loff_t pos, bool is_write); bool migration_available(vfu_ctx_t *vfu_ctx); @@ -216,21 +216,18 @@ handle_erom_write(vfu_ctx_t *ctx, vfu_pci_config_space_t *pci, return 0; } -static inline int -pci_hdr_write(vfu_ctx_t *vfu_ctx, uint16_t offset, - const char *buf, size_t count) +static int +pci_hdr_write(vfu_ctx_t *vfu_ctx, vfu_pci_config_space_t *cfg_space, + const char *buf, size_t count, loff_t offset) { - vfu_pci_config_space_t *pci; int ret = 0; assert(vfu_ctx != NULL); assert(buf != NULL); - pci = vfu_pci_get_config_space(vfu_ctx); - switch (offset) { case PCI_COMMAND: - ret = handle_command_write(vfu_ctx, pci, buf, count); + ret = handle_command_write(vfu_ctx, cfg_space, buf, count); break; case PCI_STATUS: vfu_log(vfu_ctx, LOG_INFO, "write to status ignored\n"); @@ -240,12 +237,13 @@ pci_hdr_write(vfu_ctx_t *vfu_ctx, uint16_t offset, ret = -EINVAL; break; case PCI_INTERRUPT_LINE: - pci->hdr.intr.iline = buf[0]; - vfu_log(vfu_ctx, LOG_DEBUG, "ILINE=%0x\n", pci->hdr.intr.iline); + cfg_space->hdr.intr.iline = buf[0]; + vfu_log(vfu_ctx, LOG_DEBUG, "ILINE=%0x\n", cfg_space->hdr.intr.iline); break; case PCI_LATENCY_TIMER: - pci->hdr.mlt = (uint8_t)buf[0]; - vfu_log(vfu_ctx, LOG_INFO, "set to latency timer to %hhx\n", pci->hdr.mlt); + cfg_space->hdr.mlt = (uint8_t)buf[0]; + vfu_log(vfu_ctx, LOG_INFO, "set to latency timer to %hhx\n", + cfg_space->hdr.mlt); break; case PCI_BASE_ADDRESS_0: case PCI_BASE_ADDRESS_1: @@ -256,7 +254,7 @@ pci_hdr_write(vfu_ctx_t *vfu_ctx, uint16_t offset, pci_hdr_write_bar(vfu_ctx, BAR_INDEX(offset), buf); break; case PCI_ROM_ADDRESS: - ret = handle_erom_write(vfu_ctx, pci, buf, count); + ret = handle_erom_write(vfu_ctx, cfg_space, buf, count); break; default: vfu_log(vfu_ctx, LOG_INFO, "PCI config write %#x-%#lx not handled\n", @@ -265,84 +263,114 @@ pci_hdr_write(vfu_ctx_t *vfu_ctx, uint16_t offset, } #ifdef VFU_VERBOSE_LOGGING - dump_buffer("PCI header", (char*)pci->hdr.raw, 0xff); + dump_buffer("PCI header", (const char *)cfg_space->hdr.raw, 0xff); #endif return ret; } -/* - * @pci_hdr: the PCI header - * @reg_info: region info - * @rw: the command - * @write: whether this is a PCI header write - * @count: output parameter that receives the number of bytes read/written - */ -int -pci_hdr_access(vfu_ctx_t *vfu_ctx, uint32_t *count, - uint64_t *pos, bool is_write, char *buf) +static int +pci_hdr_access(vfu_ctx_t *vfu_ctx, char *buf, size_t *countp, + loff_t *offsetp, bool is_write) { - uint32_t _count; - loff_t _pos; + vfu_pci_config_space_t *cfg_space = vfu_pci_get_config_space(vfu_ctx); + size_t count; int err = 0; - assert(vfu_ctx != NULL); - assert(count != NULL); - assert(pos != NULL); - assert(buf != NULL); + assert(countp != NULL); + assert(offsetp != NULL); - _pos = *pos - region_to_offset(VFU_PCI_DEV_CFG_REGION_IDX); - _count = MIN(*count, PCI_STD_HEADER_SIZEOF - _pos); + count = MIN(*countp, (size_t)(PCI_STD_HEADER_SIZEOF - *offsetp)); if (is_write) { - err = pci_hdr_write(vfu_ctx, _pos, buf, _count); + err = pci_hdr_write(vfu_ctx, cfg_space, buf, count, *offsetp); } else { - memcpy(buf, vfu_pci_get_config_space(vfu_ctx)->hdr.raw + _pos, _count); + memcpy(buf, cfg_space->hdr.raw + *offsetp, count); } - *pos += _count; - *count -= _count; - return err; -} -static uint32_t -pci_config_space_size(vfu_ctx_t *vfu_ctx) -{ - return vfu_ctx->reg_info[VFU_PCI_DEV_CFG_REGION_IDX].size; + *countp -= count; + *offsetp += count; + + return err; } /* - * Accesses the non-standard part (offset >= 0x40) of the PCI configuration - * space. + * Special handler for config space: we handle all accesses to the standard PCI + * header, as well as to any capabilities. + * + * Outside of those areas, if a callback is specified for the region, we'll use + * that; otherwise, writes are not allowed, and reads are satisfied with + * memcpy(). + * + * Returns the number of bytes handled, or -errno on error. */ ssize_t pci_config_space_access(vfu_ctx_t *vfu_ctx, char *buf, size_t count, - loff_t pos, bool is_write) + loff_t offset, bool is_write) { - int ret; + vfu_region_access_cb_t *cb; + loff_t start = offset; + ssize_t ret; + int rc; + + assert(vfu_ctx != NULL); + + cb = vfu_ctx->reg_info[VFU_PCI_DEV_CFG_REGION_IDX].cb; + + if (offset < PCI_STD_HEADER_SIZEOF) { + rc = pci_hdr_access(vfu_ctx, buf, &count, &offset, is_write); + if (rc < 0) { + vfu_log(vfu_ctx, LOG_ERR, "failed to access PCI header: %s", + strerror(-rc)); + dump_buffer("buffer write", buf, count); + return rc; + } + } + + if (count == 0) { + return offset - start; + } - count = MIN(pci_config_space_size(vfu_ctx), count); if (is_write) { - ret = cap_maybe_access(vfu_ctx, vfu_ctx->pci.caps, buf, count, pos); - if (ret < 0) { + rc = cap_maybe_access(vfu_ctx, vfu_ctx->pci.caps, buf, count, offset); + + if (rc < 0) { vfu_log(vfu_ctx, LOG_ERR, "bad access to capabilities %#lx-%#lx\n", - pos, pos + count); - return ret; + offset, offset + count); + return rc; + } else if (rc > 0) { + offset += count; + return offset - start; } + + if (cb == NULL) { + vfu_log(vfu_ctx, LOG_ERR, "config space write: no callback"); + return -EINVAL; + } + } else { - /* - * It is legitimate to read the non-standard part of the PCI config - * space even if there are no capabilities. - */ - memcpy(buf, vfu_ctx->pci.config_space->raw + pos, count); + if (cb == NULL) { + memcpy(buf, vfu_ctx->pci.config_space->raw + offset, count); + offset += count; + return offset - start; + } } - return count; + + ret = cb(vfu_ctx, buf, count, offset, is_write); + offset += ret; + + if (ret < 0) { + return ret; + } + + return offset - start; } int vfu_pci_init(vfu_ctx_t *vfu_ctx, vfu_pci_type_t pci_type, int hdr_type, int revision UNUSED) { - vfu_pci_config_space_t *config_space; + vfu_pci_config_space_t *cfg_space; size_t size; assert(vfu_ctx != NULL); @@ -377,13 +405,13 @@ vfu_pci_init(vfu_ctx_t *vfu_ctx, vfu_pci_type_t pci_type, } // Allocate a buffer for the config space. - config_space = calloc(1, size); - if (config_space == NULL) { + cfg_space = calloc(1, size); + if (cfg_space == NULL) { return ERROR(ENOMEM); } vfu_ctx->pci.type = pci_type; - vfu_ctx->pci.config_space = config_space; + vfu_ctx->pci.config_space = cfg_space; vfu_ctx->reg_info[VFU_PCI_DEV_CFG_REGION_IDX].size = size; return 0; @@ -36,17 +36,6 @@ #include "libvfio-user.h" #include "private.h" -static inline bool -pci_is_hdr_access(uint64_t pos) -{ - const uint64_t off = region_to_offset(VFU_PCI_DEV_CFG_REGION_IDX); - return pos >= off && pos - off < PCI_STD_HEADER_SIZEOF; -} - -int -pci_hdr_access(vfu_ctx_t *vfu_ctx, uint32_t *count, - uint64_t *pos, bool is_write, char *buf); - ssize_t pci_config_space_access(vfu_ctx_t *vfu_ctx, char *buf, size_t count, loff_t pos, bool is_write); diff --git a/lib/private.h b/lib/private.h index 871ba29..a3b90f5 100644 --- a/lib/private.h +++ b/lib/private.h @@ -35,9 +35,6 @@ #include "dma.h" -#define VFU_REGION_SHIFT 40 -#define VFU_REGION_MASK ((1ULL << VFU_REGION_SHIFT) - 1) - static inline int ERROR(int err) { @@ -45,13 +42,6 @@ ERROR(int err) return -1; } -#ifdef VFU_VERBOSE_LOGGING -void -dump_buffer(const char *prefix, const char *buf, uint32_t count); -#else -#define dump_buffer(prefix, buf, count) -#endif - struct transport_ops { int (*init)(vfu_ctx_t*); int (*attach)(vfu_ctx_t*); @@ -99,7 +89,7 @@ typedef struct { * Note that the memory of the region is owned by the user, except for the * standard header (first 64 bytes) of the PCI configuration space. */ - vfu_region_access_cb_t *fn; + vfu_region_access_cb_t *cb; struct vfu_sparse_mmap_areas *mmap_areas; /* sparse mmap areas */ int fd; @@ -139,25 +129,12 @@ struct vfu_ctx { vfu_dev_type_t dev_type; }; -int -vfu_pci_hdr_access(vfu_ctx_t *vfu_ctx, uint32_t *count, - uint64_t *pos, bool write, char *buf); +void +dump_buffer(const char *prefix, const char *buf, uint32_t count); vfu_reg_info_t * vfu_get_region_info(vfu_ctx_t *vfu_ctx); -static inline uint64_t -region_to_offset(uint32_t region) -{ - return (uint64_t)region << VFU_REGION_SHIFT; -} - -static inline uint32_t -offset_to_region(uint64_t offset) -{ - return (offset >> VFU_REGION_SHIFT) & VFU_REGION_MASK; -} - int handle_dma_map_or_unmap(vfu_ctx_t *vfu_ctx, uint32_t size, bool map, int *fds, size_t nr_fds, diff --git a/test/unit-tests.c b/test/unit-tests.c index 6fb89a4..30cc71c 100644 --- a/test/unit-tests.c +++ b/test/unit-tests.c @@ -471,24 +471,25 @@ test_get_region_info(UNUSED void **state) dev_get_reginfo(&vfu_ctx, index, argsz, &vfio_reg, &fds, &nr_fds)); assert_int_equal(sizeof(struct vfio_region_info), vfio_reg->argsz); - assert_int_equal(VFU_REGION_FLAG_RW, vfio_reg->flags); + assert_int_equal(VFIO_REGION_INFO_FLAG_READ | VFIO_REGION_INFO_FLAG_WRITE | + VFIO_REGION_INFO_FLAG_MMAP, vfio_reg->flags); assert_int_equal(1, vfio_reg->index); - assert_int_equal(0x10000000000, region_to_offset(vfio_reg->index)); + assert_int_equal(0x10000000000, vfio_reg->offset); assert_int_equal(0xdeadbeef, vfio_reg->size); assert_int_equal(0, nr_fds); /* regions caps (sparse mmap) but argsz too small */ - mmap_areas->nr_mmap_areas = 1; + mmap_areas->nr_mmap_areas = 1; mmap_areas->areas[0].iov_base = (void*)0x8badf00d; mmap_areas->areas[0].iov_len = 0x0d15ea5e; vfu_ctx.reg_info[1].mmap_areas = mmap_areas; - vfu_ctx.reg_info[1].flags |= VFIO_REGION_INFO_FLAG_MMAP; assert_int_equal(0, dev_get_reginfo(&vfu_ctx, index, argsz, &vfio_reg, &fds, &nr_fds)); assert_int_equal(argsz + sizeof(struct vfio_region_info_cap_sparse_mmap) + sizeof(struct vfio_region_sparse_mmap_area), vfio_reg->argsz); - assert_int_equal(VFU_REGION_FLAG_RW | VFIO_REGION_INFO_FLAG_MMAP | VFIO_REGION_INFO_FLAG_CAPS, + assert_int_equal(VFIO_REGION_INFO_FLAG_READ | VFIO_REGION_INFO_FLAG_WRITE | + VFIO_REGION_INFO_FLAG_MMAP | VFIO_REGION_INFO_FLAG_CAPS, vfio_reg->flags); assert_int_equal(0, nr_fds); @@ -668,25 +669,36 @@ test_setup_sparse_region(void **state __attribute__((unused))) .iov_len = 0x1000 } }; + int ret; - /* bad fd */ - assert_int_equal(-1, - vfu_setup_region(&vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX, - 0x2000, NULL, 0, mmap_areas, 2, -1)); - assert_int_equal(EBADF, errno); + /* invalid mappable settings */ + ret = vfu_setup_region(&vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX, + 0x2000, NULL, 0, mmap_areas, 2, -1); + assert_int_equal(-1, ret); + assert_int_equal(EINVAL, errno); + + ret = vfu_setup_region(&vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX, + 0x2000, NULL, 0, NULL, 0, 1); + assert_int_equal(-1, ret); + assert_int_equal(EINVAL, errno); + + ret = vfu_setup_region(&vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX, + 0x2000, NULL, 0, mmap_areas, 0, 1); + assert_int_equal(-1, ret); + assert_int_equal(EINVAL, errno); /* sparse region exceeds region size */ mmap_areas[1].iov_len = 0x1001; - assert_int_equal(-1, - vfu_setup_region(&vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX, - 0x2000, NULL, 0, mmap_areas, 2, 0)); + ret = vfu_setup_region(&vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX, + 0x2000, NULL, 0, mmap_areas, 2, 0); + assert_int_equal(-1, ret); assert_int_equal(EINVAL, errno); /* sparse region within region size */ mmap_areas[1].iov_len = 0x1000; - assert_int_equal(0, - vfu_setup_region(&vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX, - 0x2000, NULL, 0, mmap_areas, 2, 0)); + ret = vfu_setup_region(&vfu_ctx, VFU_PCI_DEV_BAR0_REGION_IDX, + 0x2000, NULL, 0, mmap_areas, 2, 0); + assert_int_equal(0, ret); } static void |