From 14b237fc80473bf4419a2bccda4cf7e7d382c174 Mon Sep 17 00:00:00 2001 From: John Levon Date: Wed, 31 Mar 2021 16:49:52 +0100 Subject: rework DMA callbacks (#396) This fixes a number of issues with how DMA is handled, based on some changes by Thanos Makatos: - rename callbacks to register/unregister, as there is not necessarily any mapping - provide the (large) page-aligned mapped start and size, the page size used, as well as the protection flags: some API users need these - for convenience, provide the virtual address separately that corresponds to the mapped region - we should only require a DMA controller to use vfu_addr_to_sg(), not an unregister callback - the callbacks should return errno not -errno - region removal was incorrectly updating the region array - various other cleanups and clarifications Signed-off-by: John Levon Reviewed-by: Thanos Makatos --- include/libvfio-user.h | 118 +++++++++++++------ lib/dma.c | 276 +++++++++++++++++++++++---------------------- lib/dma.h | 108 ++++++------------ lib/irq.c | 4 +- lib/libvfio-user.c | 107 +++++++++--------- lib/private.h | 36 ++++-- samples/gpio-pci-idio-16.c | 21 ++-- samples/server.c | 46 ++++---- test/mocks.c | 42 ++++--- test/mocks.h | 12 +- test/unit-tests.c | 210 +++++++++++++++++++--------------- 11 files changed, 514 insertions(+), 466 deletions(-) diff --git a/include/libvfio-user.h b/include/libvfio-user.h index 7b9532b..1048236 100644 --- a/include/libvfio-user.h +++ b/include/libvfio-user.h @@ -60,12 +60,12 @@ extern "C" { #define VFU_DMA_REGIONS 0x10 -// FIXME: too common a name? -typedef uint64_t dma_addr_t; +/* DMA addresses cannot be directly de-referenced. */ +typedef void *vfu_dma_addr_t; typedef struct { - dma_addr_t dma_addr; - int region; /* TODO replace region and length with struct iovec */ + vfu_dma_addr_t *dma_addr; + int region; int length; uint64_t offset; bool mappable; @@ -313,40 +313,95 @@ int vfu_setup_device_reset_cb(vfu_ctx_t *vfu_ctx, vfu_reset_cb_t *reset); /* - * Function that is called when the guest maps a DMA region. Optional. + * Info for a guest DMA region. @iova is always valid; the other parameters + * will only be set if the guest DMA region is VFIO_USER_F_DMA_REGION_MAPPABLE, + * and a file descriptor has been passed across the vfio-user socket. + * + * @iova: guest DMA range. This is the guest physical range (as we don't + * support vIOMMU) that the guest registers for DMA, via a VFIO_USER_DMA_MAP + * message, and is the address space used as input to vfu_addr_to_sg(). + * @vaddr: if the range is mapped into this process, this is the virtual address + * of the start of the region. + * @mapping: if @vaddr is non-NULL, this range represents the actual range + * mmap()ed into the process. This might be (large) page aligned, and + * therefore be different from @vaddr + @iova.iov_len. + * @page_size: if @vaddr is non-NULL, page size of the mapping (e.g. 2MB) + * @prot: if @vaddr is non-NULL, protection settings of the mapping as per + * mmap(2) + * + * For a real example, using the gpio sample server, and a qemu configured to + * use huge pages and share its memory: + * + * gpio: mapped DMA region iova=[0xf0000-0x10000000) vaddr=0x2aaaab0f0000 + * page_size=0x200000 mapping=[0x2aaaab000000-0x2aaabb000000) + * + * 0xf0000 0x10000000 + * | | + * v v + * +-----------------------------------+ + * | Guest IOVA (DMA) space | + * +--+-----------------------------------+--+ + * | | | | + * | +-----------------------------------+ | + * | ^ libvfio-user server address space | + * +--|--------------------------------------+ + * ^ vaddr=0x2aaaab0f0000 ^ + * | | + * 0x2aaaab000000 0x2aaabb000000 + * + * This region can be directly accessed at 0x2aaaab0f0000, but the underlying + * large page mapping is in the range [0x2aaaab000000-0x2aaabb000000). + */ +typedef struct vfu_dma_info { + struct iovec iova; + void *vaddr; + struct iovec mapping; + size_t page_size; + uint32_t prot; +} vfu_dma_info_t; + +/* + * Called when a guest registers one of its DMA regions via a VFIO_USER_DMA_MAP + * message. * * @vfu_ctx: the libvfio-user context - * @iova: iova address - * @len: length - * @prot: memory protection used to map region as defined in + * @info: the DMA info */ -typedef void (vfu_map_dma_cb_t)(vfu_ctx_t *vfu_ctx, - uint64_t iova, uint64_t len, uint32_t prot); +typedef void (vfu_dma_register_cb_t)(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info); /* - * Function that is called when the guest unmaps a DMA region. The device + * Function that is called when the guest unregisters a DMA region. The device * must release all references to that region before the callback returns. - * This is required if you want to be able to access guest memory. + * This is required if you want to be able to access guest memory directly via + * a mapping. + * + * The callback should return 0 on success, errno on failure (although + * unregister should not fail: this will not stop a guest from unregistering the + * region). * * @vfu_ctx: the libvfio-user context - * @iova: iova address - * @len: length + * @info: the DMA info */ -typedef int (vfu_unmap_dma_cb_t)(vfu_ctx_t *vfu_ctx, - uint64_t iova, uint64_t len); +typedef int (vfu_dma_unregister_cb_t)(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info); /** - * Setup device DMA map/unmap callbacks. This will also enable bookkeeping of - * DMA regions received from client, otherwise they will be just acknowledged. + * Setup device DMA registration callbacks. When libvfio-user is notified of a + * DMA range addition or removal, these callbacks will be invoked. + * + * If this function is not called, guest DMA regions are not accessible via + * vfu_addr_to_sg(). + * + * To directly access this DMA memory via a local mapping with vfu_map_sg(), at + * least @dma_unregister must be provided. * * @vfu_ctx: the libvfio-user context - * @map_dma: DMA region map callback (optional) - * @unmap_dma: DMA region unmap callback (optional) + * @dma_register: DMA region registration callback (optional) + * @dma_unregister: DMA region unregistration callback (optional) */ int -vfu_setup_device_dma_cb(vfu_ctx_t *vfu_ctx, vfu_map_dma_cb_t *map_dma, - vfu_unmap_dma_cb_t *unmap_dma); +vfu_setup_device_dma(vfu_ctx_t *vfu_ctx, vfu_dma_register_cb_t *dma_register, + vfu_dma_unregister_cb_t *dma_unregister); enum vfu_dev_irq_type { VFU_DEV_INTX_IRQ, @@ -481,7 +536,7 @@ typedef struct { * to client accesses of the migration region; the migration region read/write * callbacks are not called after this function call. Offsets in callbacks are * relative to @data_offset. - * + * * @vfu_ctx: the libvfio-user context * @callbacks: migration callbacks * @data_offset: offset in the migration region where data begins. @@ -512,8 +567,8 @@ vfu_irq_trigger(vfu_ctx_t *vfu_ctx, uint32_t subindex); * than can be individually mapped in the program's virtual memory. A single * linear guest physical address span may need to be split into multiple * scatter/gather regions due to limitations of how memory can be mapped. - * Field unmap_dma must have been provided at context creation time in order - * to use this function. + * + * vfu_setup_device_dma() must have been called prior to using this function. * * @vfu_ctx: the libvfio-user context * @dma_addr: the guest physical address @@ -530,15 +585,16 @@ vfu_irq_trigger(vfu_ctx_t *vfu_ctx, uint32_t subindex); * entries necessary to complete this request (errno=0). */ int -vfu_addr_to_sg(vfu_ctx_t *vfu_ctx, dma_addr_t dma_addr, uint32_t len, +vfu_addr_to_sg(vfu_ctx_t *vfu_ctx, vfu_dma_addr_t dma_addr, size_t len, dma_sg_t *sg, int max_sg, int prot); /** * Maps a list scatter/gather entries from the guest's physical address space * to the program's virtual memory. It is the caller's responsibility to remove - * the mappings by calling vfu_unmap_sg. - * Field unmap_dma must have been provided at context creation time in order - * to use this function. + * the mappings by calling vfu_unmap_sg(). + * + * This is only supported when a @dma_unregister callback is provided to + * vfu_setup_device_dma(). * * @vfu_ctx: the libvfio-user context * @sg: array of scatter/gather entries returned by vfu_addr_to_sg @@ -553,10 +609,8 @@ vfu_map_sg(vfu_ctx_t *vfu_ctx, const dma_sg_t *sg, struct iovec *iov, int cnt); /** - * Unmaps a list scatter/gather entries (previously mapped by vfu_map_sg) from + * Unmaps a list scatter/gather entries (previously mapped by vfu_map_sg()) from * the program's virtual memory. - * Field unmap_dma must have been provided at context creation time in order - * to use this function. * * @vfu_ctx: the libvfio-user context * @sg: array of scatter/gather entries to unmap diff --git a/lib/dma.c b/lib/dma.c index c06d666..d3db436 100644 --- a/lib/dma.c +++ b/lib/dma.c @@ -92,54 +92,53 @@ dma_controller_create(vfu_ctx_t *vfu_ctx, int max_regions) } void -MOCK_DEFINE(_dma_controller_do_remove_region)(dma_controller_t *dma, - dma_memory_region_t *region) +MOCK_DEFINE(dma_controller_unmap_region)(dma_controller_t *dma, + dma_memory_region_t *region) { int err; assert(dma != NULL); assert(region != NULL); - err = dma_unmap_region(region, region->virt_addr, region->size); + err = munmap(region->info.mapping.iov_base, region->info.mapping.iov_len); if (err != 0) { - vfu_log(dma->vfu_ctx, LOG_DEBUG, "failed to unmap fd=%d vaddr=%p-%p\n", - region->fd, region->virt_addr, - region->virt_addr + region->size - 1); + vfu_log(dma->vfu_ctx, LOG_DEBUG, "failed to unmap fd=%d " + "mapping=[%p, %p): %m\n", + region->fd, region->info.mapping.iov_base, + iov_end(®ion->info.mapping)); } - if (region->fd != -1) { - if (close(region->fd) == -1) { - vfu_log(dma->vfu_ctx, LOG_DEBUG, - "failed to close fd %d: %m\n", region->fd); - } + + assert(region->fd != -1); + + if (close(region->fd) == -1) { + vfu_log(dma->vfu_ctx, LOG_WARNING, "failed to close fd %d: %m\n", + region->fd); } } -/* - * FIXME no longer used. Also, it doesn't work for addresses that span two - * DMA regions. - */ -bool -dma_controller_region_valid(dma_controller_t *dma, dma_addr_t dma_addr, - size_t size) +static void +array_remove(void *array, size_t elem_size, size_t index, int *nr_elemsp) { - dma_memory_region_t *region; - int i; + void *dest; + void *src; + size_t nr; - for (i = 0; i < dma->nregions; i++) { - region = &dma->regions[i]; - if (dma_addr == region->dma_addr && size <= region->size) { - return true; - } - } + assert((size_t)*nr_elemsp > index); + + nr = *nr_elemsp - (index + 1); + dest = (char *)array + (index * elem_size); + src = (char *)array + ((index + 1) * elem_size); + + memmove(dest, src, nr * elem_size); - return false; + (*nr_elemsp)--; } /* FIXME not thread safe */ int MOCK_DEFINE(dma_controller_remove_region)(dma_controller_t *dma, - dma_addr_t dma_addr, size_t size, - vfu_unmap_dma_cb_t *unmap_dma, + vfu_dma_addr_t dma_addr, size_t size, + vfu_dma_unregister_cb_t *dma_unregister, void *data) { int idx; @@ -150,29 +149,29 @@ MOCK_DEFINE(dma_controller_remove_region)(dma_controller_t *dma, for (idx = 0; idx < dma->nregions; idx++) { region = &dma->regions[idx]; - if (region->dma_addr != dma_addr || region->size != size) { + if (region->info.iova.iov_base != dma_addr || + region->info.iova.iov_len != size) { continue; } - err = unmap_dma(data, region->dma_addr, region->size); + + err = dma_unregister(data, ®ion->info); if (err != 0) { vfu_log(dma->vfu_ctx, LOG_ERR, - "failed to notify of removal of DMA region %#lx-%#lx: %s\n", - region->dma_addr, region->dma_addr + region->size, - strerror(-err)); + "failed to dma_unregister() DMA region [%#lx, %#lx): %s\n", + region->info.iova.iov_base, iov_end(®ion->info.iova), + strerror(err)); return err; } + assert(region->refcnt == 0); - if (region->virt_addr != NULL) { - _dma_controller_do_remove_region(dma, region); + + if (region->info.vaddr != NULL) { + dma_controller_unmap_region(dma, region); + } else { + assert(region->fd == -1); } - if (dma->nregions > 1) - /* - * FIXME valgrind complains with 'Source and destination overlap in memcpy', - * check whether memmove eliminates this warning. - */ - memcpy(region, &dma->regions[dma->nregions - 1], - sizeof(*region)); - dma->nregions--; + + array_remove(&dma->regions, sizeof (*region), idx, &dma->nregions); return 0; } return -ENOENT; @@ -183,15 +182,22 @@ dma_controller_remove_regions(dma_controller_t *dma) { int i; - assert(dma); + assert(dma != NULL); for (i = 0; i < dma->nregions; i++) { dma_memory_region_t *region = &dma->regions[i]; - vfu_log(dma->vfu_ctx, LOG_INFO, "unmap vaddr=%p IOVA=%lx", - region->virt_addr, region->dma_addr); + vfu_log(dma->vfu_ctx, LOG_DEBUG, "removing DMA region " + "iova=[%#lx, %#lx) vaddr=%#lx mapping=[%#lx, %#lx)", + region->info.iova.iov_base, iov_end(®ion->info.iova), + region->info.vaddr, + region->info.mapping.iov_base, iov_end(®ion->info.mapping)); - _dma_controller_do_remove_region(dma, region); + if (region->info.vaddr != NULL) { + dma_controller_unmap_region(dma, region); + } else { + assert(region->fd == -1); + } } } @@ -206,26 +212,65 @@ dma_controller_destroy(dma_controller_t *dma) free(dma); } +static int +dma_map_region(dma_controller_t *dma, dma_memory_region_t *region) +{ + void *mmap_base; + size_t mmap_len; + off_t offset; + + offset = ROUND_DOWN(region->offset, region->info.page_size); + mmap_len = ROUND_UP(region->info.iova.iov_len, region->info.page_size); + + mmap_base = mmap(NULL, mmap_len, region->info.prot, MAP_SHARED, + region->fd, offset); + + if (mmap_base == MAP_FAILED) { + return -errno; + } + + // Do not dump. + madvise(mmap_base, mmap_len, MADV_DONTDUMP); + + region->info.mapping.iov_base = mmap_base; + region->info.mapping.iov_len = mmap_len; + region->info.vaddr = mmap_base + (region->offset - offset); + + vfu_log(dma->vfu_ctx, LOG_DEBUG, "mapped DMA region iova=[%#lx, %#lx) " + "vaddr=%#lx page_size=%#lx mapping=[%#lx, %#lx)", + region->info.iova.iov_base, iov_end(®ion->info.iova), + region->info.vaddr, region->info.page_size, + region->info.mapping.iov_base, iov_end(®ion->info.mapping)); + + + return 0; +} + int MOCK_DEFINE(dma_controller_add_region)(dma_controller_t *dma, - dma_addr_t dma_addr, size_t size, + vfu_dma_addr_t dma_addr, size_t size, int fd, off_t offset, uint32_t prot) { - int idx; dma_memory_region_t *region; int page_size = 0; + char rstr[1024]; + int idx; + int ret; assert(dma != NULL); + snprintf(rstr, sizeof(rstr), "[%p, %p) fd=%d offset=%#lx prot=%#x", + dma_addr, (char *)dma_addr + size, fd, offset, prot); + for (idx = 0; idx < dma->nregions; idx++) { region = &dma->regions[idx]; /* First check if this is the same exact region. */ - if (region->dma_addr == dma_addr && region->size == size) { + if (region->info.iova.iov_base == dma_addr && + region->info.iova.iov_len == size) { if (offset != region->offset) { - vfu_log(dma->vfu_ctx, LOG_ERR, - "bad offset for new DMA region %#lx-%#lx, want=%ld, existing=%ld\n", - dma_addr, dma_addr + size, offset, region->offset); + vfu_log(dma->vfu_ctx, LOG_ERR, "bad offset for new DMA region " + "%s; existing=%#lx\n", rstr, region->offset); goto err; } if (!fds_are_same_file(region->fd, fd)) { @@ -235,36 +280,34 @@ MOCK_DEFINE(dma_controller_add_region)(dma_controller_t *dma, * the same file, however in the majority of cases we'll be * using a single fd. */ - vfu_log(dma->vfu_ctx, LOG_ERR, - "bad fd=%d for new DMA region %#lx-%#lx, existing fd=%d\n", - fd, offset, offset + size - 1, region->fd); + vfu_log(dma->vfu_ctx, LOG_ERR, "bad fd for new DMA region %s; " + "existing=%d\n", rstr, region->fd); goto err; } - if (region->prot != prot) { - vfu_log(dma->vfu_ctx, LOG_ERR, "bad prot=%#x " - "for new DMA region %#lx-%#lx, existing prot=%#x\n", - prot, offset, offset + size - 1, region->prot); + if (region->info.prot != prot) { + vfu_log(dma->vfu_ctx, LOG_ERR, "bad prot for new DMA region " + "%s; existing=%#x", rstr, region->info.prot); goto err; } return idx; } /* Check for overlap, i.e. start of one region is within another. */ - if ((dma_addr >= region->dma_addr && - dma_addr < region->dma_addr + region->size) || - (region->dma_addr >= dma_addr && - region->dma_addr < dma_addr + size)) { - vfu_log(dma->vfu_ctx, LOG_INFO, - "new DMA region %#lx+%#lx overlaps with DMA region %#lx-%#lx\n", - dma_addr, size, region->dma_addr, region->size); + if ((dma_addr >= region->info.iova.iov_base && + dma_addr < iov_end(®ion->info.iova)) || + (region->info.iova.iov_base >= dma_addr && + region->info.iova.iov_base < dma_addr + size)) { + vfu_log(dma->vfu_ctx, LOG_INFO, "new DMA region %s overlaps with " + "DMA region [%#lx, %#lx)", rstr, region->info.iova.iov_base, + iov_end(®ion->info.iova)); goto err; } } if (dma->nregions == dma->max_regions) { idx = dma->max_regions; - vfu_log(dma->vfu_ctx, LOG_ERR, - "reached maxed regions, recompile with higher number of DMA regions\n"); + vfu_log(dma->vfu_ctx, LOG_ERR, "hit max regions %d\n", + dma->max_regions); goto err; } @@ -280,32 +323,31 @@ MOCK_DEFINE(dma_controller_add_region)(dma_controller_t *dma, } page_size = MAX(page_size, getpagesize()); - region->dma_addr = dma_addr; - region->size = size; - region->page_size = page_size; + memset(region, 0, sizeof (*region)); + + region->info.iova.iov_base = (void *)dma_addr; + region->info.iova.iov_len = size; + region->info.page_size = page_size; + region->info.prot = prot; region->offset = offset; - region->prot = prot; region->fd = fd; - region->refcnt = 0; if (fd != -1) { - region->virt_addr = dma_map_region(region, region->prot, 0, - region->size); - if (region->virt_addr == MAP_FAILED) { + ret = dma_map_region(dma, region); + + if (ret != 0) { vfu_log(dma->vfu_ctx, LOG_ERR, - "failed to memory map DMA region %#lx-%#lx: %s\n", - dma_addr, dma_addr + size, strerror(errno)); - if (region->fd != -1) { - if (close(region->fd) == -1) { - vfu_log(dma->vfu_ctx, LOG_DEBUG, - "failed to close fd %d: %m\n", region->fd); - } + "failed to memory map DMA region %s: %s\n", rstr, + strerror(-ret)); + + if (close(region->fd) == -1) { + vfu_log(dma->vfu_ctx, LOG_WARNING, + "failed to close fd %d: %m\n", region->fd); } goto err; } - } else { - region->virt_addr = NULL; } + dma->nregions++; return idx; @@ -314,53 +356,9 @@ err: return -idx - 1; } -static inline void -mmap_round(size_t *offset, size_t *size, size_t page_size) -{ - size_t offset_orig = *offset; - *offset = ROUND_DOWN(offset_orig, page_size); - *size = ROUND_UP(offset_orig + *size, page_size) - *offset; -} - -void * -MOCK_DEFINE(dma_map_region)(dma_memory_region_t *region, int prot, - size_t offset, size_t len) -{ - size_t mmap_offset, mmap_size = len; - char *mmap_base; - - if (offset >= region->size || offset + len > region->size) { - return MAP_FAILED; - } - - offset += region->offset; - mmap_offset = offset; - mmap_round(&mmap_offset, &mmap_size, region->page_size); - - // Do the mmap. - // Note: As per mmap(2) manpage, on some hardware architectures - // (e.g., i386), PROT_WRITE implies PROT_READ - mmap_base = mmap(NULL, mmap_size, prot, MAP_SHARED, - region->fd, mmap_offset); - if (mmap_base == MAP_FAILED) { - return mmap_base; - } - // Do not dump. - madvise(mmap_base, mmap_size, MADV_DONTDUMP); - - return mmap_base + (offset - mmap_offset); -} - -int -dma_unmap_region(dma_memory_region_t *region, void *virt_addr, size_t len) -{ - mmap_round((size_t *)&virt_addr, &len, region->page_size); - return munmap(virt_addr, len); -} - int _dma_addr_sg_split(const dma_controller_t *dma, - dma_addr_t dma_addr, uint32_t len, + vfu_dma_addr_t dma_addr, uint32_t len, dma_sg_t *sg, int max_sg, int prot) { int idx; @@ -371,9 +369,10 @@ _dma_addr_sg_split(const dma_controller_t *dma, found = false; for (idx = 0; idx < dma->nregions; idx++) { const dma_memory_region_t *const region = &dma->regions[idx]; - const dma_addr_t region_end = region->dma_addr + region->size; + vfu_dma_addr_t region_start = region->info.iova.iov_base; + vfu_dma_addr_t region_end = iov_end(®ion->info.iova); - while (dma_addr >= region->dma_addr && dma_addr < region_end) { + while (dma_addr >= region_start && dma_addr < region_end) { size_t region_len = MIN(region_end - dma_addr, len); if (cnt < max_sg) { @@ -443,7 +442,10 @@ int dma_controller_dirty_page_logging_start(dma_controller_t *dma, size_t pgsize for (i = 0; i < dma->nregions; i++) { dma_memory_region_t *region = &dma->regions[i]; - ssize_t bitmap_size = get_bitmap_size(region->size, pgsize); + ssize_t bitmap_size; + + bitmap_size = get_bitmap_size(region->info.iova.iov_len, pgsize); + if (bitmap_size < 0) { return bitmap_size; } @@ -481,8 +483,8 @@ int dma_controller_dirty_page_logging_stop(dma_controller_t *dma) } int -dma_controller_dirty_page_get(dma_controller_t *dma, dma_addr_t addr, int len, - size_t pgsize, size_t size, char **data) +dma_controller_dirty_page_get(dma_controller_t *dma, vfu_dma_addr_t addr, + int len, size_t pgsize, size_t size, char **data) { int ret; ssize_t bitmap_size; diff --git a/lib/dma.h b/lib/dma.h index 0eb7ac3..226e758 100644 --- a/lib/dma.h +++ b/lib/dma.h @@ -51,8 +51,6 @@ * list (sglist) of dma_sg_t from DMA addresses. Then the sglist * can be mapped using dma_map_sg() into the process's virtual address space * as an iovec for direct access, and unmapped using dma_unmap_sg() when done. - * - dma_map_addr() and dma_unmap_addr() helper functions are provided - * for mapping DMA regions that can fit into one scatter-gather entry. * Every region is mapped into the application's virtual address space * at registration time with R/W permissions. * dma_map_sg() ignores all protection bits and only does lookups and @@ -79,18 +77,14 @@ #include "libvfio-user.h" #include "common.h" +#define iov_end(iov) ((iov)->iov_base + (iov)->iov_len) + struct vfu_ctx; typedef struct { - dma_addr_t dma_addr; // DMA address of this region - uint32_t prot; // memory protection of the mapping - // defined in sys/mman.h - - size_t size; // Size of this region + vfu_dma_info_t info; int fd; // File descriptor to mmap - int page_size; // Page size of this fd off_t offset; // File offset - void *virt_addr; // Virtual address of this region int refcnt; // Number of users of this region char *dirty_bitmap; // Dirty page bitmap } dma_memory_region_t; @@ -117,20 +111,20 @@ dma_controller_destroy(dma_controller_t *dma); * (e.g. due to conflict with existing region). */ MOCK_DECLARE(int, dma_controller_add_region, dma_controller_t *dma, - dma_addr_t dma_addr, size_t size, int fd, off_t offset, + vfu_dma_addr_t dma_addr, size_t size, int fd, off_t offset, uint32_t prot); -MOCK_DECLARE(void, _dma_controller_do_remove_region, dma_controller_t *dma, - dma_memory_region_t *region); - MOCK_DECLARE(int, dma_controller_remove_region, dma_controller_t *dma, - dma_addr_t dma_addr, size_t size, vfu_unmap_dma_cb_t *unmap_dma, - void *data); + vfu_dma_addr_t dma_addr, size_t size, + vfu_dma_unregister_cb_t *dma_unregister, void *data); + +MOCK_DECLARE(void, dma_controller_unmap_region, dma_controller_t *dma, + dma_memory_region_t *region); // Helper for dma_addr_to_sg() slow path. int _dma_addr_sg_split(const dma_controller_t *dma, - dma_addr_t dma_addr, uint32_t len, + vfu_dma_addr_t dma_addr, uint32_t len, dma_sg_t *sg, int max_sg, int prot); static bool @@ -142,9 +136,9 @@ _dma_should_mark_dirty(const dma_controller_t *dma, int prot) } static size_t -_get_pgstart(size_t pgsize, uint64_t base_addr, uint64_t offset) +_get_pgstart(size_t pgsize, void *base_addr, uint64_t offset) { - return (offset - base_addr) / pgsize; + return (offset - (uint64_t)base_addr) / pgsize; } static size_t @@ -164,7 +158,7 @@ _dma_bitmap_get_pgrange(const dma_controller_t *dma, assert(start != NULL); assert(end != NULL); - *start = _get_pgstart(dma->dirty_pgsize, region->dma_addr, sg->offset); + *start = _get_pgstart(dma->dirty_pgsize, region->info.iova.iov_base, sg->offset); *end = _get_pgend(dma->dirty_pgsize, sg->length, *start); } @@ -186,24 +180,24 @@ _dma_mark_dirty(const dma_controller_t *dma, const dma_memory_region_t *region, } static inline int -dma_init_sg(const dma_controller_t *dma, dma_sg_t *sg, dma_addr_t dma_addr, +dma_init_sg(const dma_controller_t *dma, dma_sg_t *sg, vfu_dma_addr_t dma_addr, uint32_t len, int prot, int region_index) { const dma_memory_region_t *const region = &dma->regions[region_index]; - if ((prot & PROT_WRITE) && !(region->prot & PROT_WRITE)) { + if ((prot & PROT_WRITE) && !(region->info.prot & PROT_WRITE)) { errno = EACCES; return -1; } - sg->dma_addr = region->dma_addr; + sg->dma_addr = region->info.iova.iov_base; sg->region = region_index; - sg->offset = dma_addr - region->dma_addr; + sg->offset = dma_addr - region->info.iova.iov_base; sg->length = len; if (_dma_should_mark_dirty(dma, prot)) { _dma_mark_dirty(dma, region, sg); } - sg->mappable = region->virt_addr != NULL; + sg->mappable = (region->info.vaddr != NULL); return 0; } @@ -223,18 +217,19 @@ dma_init_sg(const dma_controller_t *dma, dma_sg_t *sg, dma_addr_t dma_addr, */ static inline int dma_addr_to_sg(const dma_controller_t *dma, - dma_addr_t dma_addr, uint32_t len, + vfu_dma_addr_t dma_addr, size_t len, dma_sg_t *sg, int max_sg, int prot) { static __thread int region_hint; int cnt, ret; const dma_memory_region_t *const region = &dma->regions[region_hint]; - const dma_addr_t region_end = region->dma_addr + region->size; + const void *region_end = iov_end(®ion->info.iova); // Fast path: single region. if (likely(max_sg > 0 && len > 0 && - dma_addr >= region->dma_addr && dma_addr + len <= region_end && + dma_addr >= region->info.iova.iov_base && + dma_addr + len <= region_end && region_hint < dma->nregions)) { ret = dma_init_sg(dma, sg, dma_addr, len, prot, region_hint); if (ret < 0) { @@ -251,12 +246,6 @@ dma_addr_to_sg(const dma_controller_t *dma, return cnt; } -MOCK_DECLARE(void *, dma_map_region, dma_memory_region_t *region, int prot, - size_t offset, size_t len); - -int -dma_unmap_region(dma_memory_region_t *region, void *virt_addr, size_t len); - static inline int dma_map_sg(dma_controller_t *dma, const dma_sg_t *sg, struct iovec *iov, int cnt) @@ -273,12 +262,15 @@ dma_map_sg(dma_controller_t *dma, const dma_sg_t *sg, struct iovec *iov, return -EINVAL; } region = &dma->regions[sg[i].region]; - if (region->virt_addr == NULL) { + + if (region->info.vaddr == NULL) { return -EFAULT; } + vfu_log(dma->vfu_ctx, LOG_DEBUG, "map %#lx-%#lx\n", - sg->dma_addr + sg->offset, sg->dma_addr + sg->offset + sg->length); - iov[i].iov_base = region->virt_addr + sg[i].offset; + sg->dma_addr + sg->offset, + sg->dma_addr + sg->offset + sg->length); + iov[i].iov_base = region->info.vaddr + sg[i].offset; iov[i].iov_len = sg[i].length; region->refcnt++; } @@ -299,7 +291,8 @@ dma_unmap_sg(dma_controller_t *dma, const dma_sg_t *sg, * tfind(3) */ for (r = dma->regions; - r < dma->regions + dma->nregions && r->dma_addr != sg[i].dma_addr; + r < dma->regions + dma->nregions && + r->info.iova.iov_base != sg[i].dma_addr; r++); if (r > dma->regions + dma->nregions) { /* bad region */ @@ -312,39 +305,6 @@ dma_unmap_sg(dma_controller_t *dma, const dma_sg_t *sg, return; } -static inline void * -dma_map_addr(dma_controller_t *dma, dma_addr_t dma_addr, uint32_t len, int prot) -{ - dma_sg_t sg; - struct iovec iov; - - if (dma_addr_to_sg(dma, dma_addr, len, &sg, 1, prot) == 1 && - dma_map_sg(dma, &sg, &iov, 1) == 0) { - return iov.iov_base; - } - - return NULL; -} - -static inline void -dma_unmap_addr(dma_controller_t *dma, - dma_addr_t dma_addr, uint32_t len, void *addr) -{ - dma_sg_t sg; - struct iovec iov = { - .iov_base = addr, - .iov_len = len, - }; - int r; - - r = dma_addr_to_sg(dma, dma_addr, len, &sg, 1, PROT_NONE); - if (r != 1) { - assert(false); - } - - dma_unmap_sg(dma, &sg, &iov, 1); -} - int dma_controller_dirty_page_logging_start(dma_controller_t *dma, size_t pgsize); @@ -352,12 +312,8 @@ int dma_controller_dirty_page_logging_stop(dma_controller_t *dma); int -dma_controller_dirty_page_get(dma_controller_t *dma, dma_addr_t addr, int len, - size_t pgsize, size_t size, char **data); - -bool -dma_controller_region_valid(dma_controller_t *dma, dma_addr_t dma_addr, - size_t size); +dma_controller_dirty_page_get(dma_controller_t *dma, vfu_dma_addr_t addr, + int len, size_t pgsize, size_t size, char **data); #endif /* LIB_VFIO_USER_DMA_H */ diff --git a/lib/irq.c b/lib/irq.c index 3da1fb8..ecb6ce0 100644 --- a/lib/irq.c +++ b/lib/irq.c @@ -106,7 +106,7 @@ irqs_disable(vfu_ctx_t *vfu_ctx, uint32_t index, uint32_t start, uint32_t count) count = vfu_ctx->irq_count[index]; } - vfu_log(vfu_ctx, LOG_DEBUG, "disabling IRQ type %s range [%u-%u)", + vfu_log(vfu_ctx, LOG_DEBUG, "disabling IRQ type %s range [%u, %u)", vfio_irq_idx_to_str(index), start, start + count); switch (index) { @@ -335,7 +335,7 @@ handle_device_set_irqs(vfu_ctx_t *vfu_ctx, uint32_t size, irq_set->start, irq_set->count); } - vfu_log(vfu_ctx, LOG_DEBUG, "setting IRQ %s flags=%#x range [%u-%u)", + vfu_log(vfu_ctx, LOG_DEBUG, "setting IRQ %s flags=%#x range [%u, %u)", vfio_irq_idx_to_str(irq_set->index), irq_set->flags, irq_set->start, irq_set->start + irq_set->count); diff --git a/lib/libvfio-user.c b/lib/libvfio-user.c index 40a23b8..ebd70af 100644 --- a/lib/libvfio-user.c +++ b/lib/libvfio-user.c @@ -56,6 +56,7 @@ #include "private.h" #include "tran_sock.h" + void vfu_log(vfu_ctx_t *vfu_ctx, int level, const char *fmt, ...) { @@ -152,8 +153,8 @@ dev_get_caps(vfu_ctx_t *vfu_ctx, vfu_reg_info_t *vfu_reg, bool is_migr_reg, for (i = 0; i < nr_mmap_areas; i++) { struct iovec *iov = &vfu_reg->mmap_areas[i]; - vfu_log(vfu_ctx, LOG_DEBUG, "%s: area %d [%#llx-%#llx)", __func__, - i, iov->iov_base, iov->iov_base + iov->iov_len); + vfu_log(vfu_ctx, LOG_DEBUG, "%s: area %d [%#llx, %#llx)", __func__, + i, iov->iov_base, iov_end(iov)); (*fds)[i] = vfu_reg->fd; sparse->areas[i].offset = (uintptr_t)iov->iov_base; @@ -487,7 +488,7 @@ handle_dma_map_or_unmap(vfu_ctx_t *vfu_ctx, uint32_t size, bool map, size_t fdi; assert(vfu_ctx != NULL); - assert(fds != NULL); /* TODO assert valid only for map */ + assert(nr_fds == 0 || fds != NULL); if (vfu_ctx->dma == NULL) { return 0; @@ -504,61 +505,57 @@ handle_dma_map_or_unmap(vfu_ctx_t *vfu_ctx, uint32_t size, bool map, } for (i = 0, fdi = 0; i < nr_dma_regions; i++) { + struct vfio_user_dma_region *region = &dma_regions[i]; + char rstr[1024]; + + snprintf(rstr, sizeof(rstr), "[%#lx, %#lx) offset=%#lx " + "prot=%#x flags=%#x", region->addr, region->addr + region->size, + region->offset, region->prot, region->flags); + + vfu_log(vfu_ctx, LOG_DEBUG, "%s DMA region %s", + map ? "adding" : "removing", rstr); + if (map) { int fd = -1; - if (dma_regions[i].flags == VFIO_USER_F_DMA_REGION_MAPPABLE) { + if (region->flags == VFIO_USER_F_DMA_REGION_MAPPABLE) { fd = consume_fd(fds, nr_fds, fdi++); if (fd < 0) { + vfu_log(vfu_ctx, LOG_ERR, "failed to add DMA region %s: " + "mappable but fd not provided", rstr); ret = fd; break; } } - ret = dma_controller_add_region(vfu_ctx->dma, - dma_regions[i].addr, - dma_regions[i].size, - fd, - dma_regions[i].offset, - dma_regions[i].prot); + ret = dma_controller_add_region(vfu_ctx->dma, (void *)region->addr, + region->size, fd, region->offset, + region->prot); if (ret < 0) { if (fd != -1) { close(fd); } - vfu_log(vfu_ctx, LOG_INFO, - "failed to add DMA region %#lx-%#lx offset=%#lx fd=%d: %s", - dma_regions[i].addr, - dma_regions[i].addr + dma_regions[i].size - 1, - dma_regions[i].offset, fd, - strerror(-ret)); + vfu_log(vfu_ctx, LOG_ERR, "failed to add DMA region %s: %s", + rstr, strerror(-ret)); break; } - if (vfu_ctx->map_dma != NULL) { - vfu_ctx->map_dma(vfu_ctx, dma_regions[i].addr, - dma_regions[i].size, dma_regions[i].prot); + + if (vfu_ctx->dma_register != NULL) { + vfu_ctx->dma_register(vfu_ctx, + &vfu_ctx->dma->regions[ret].info); } + ret = 0; - vfu_log(vfu_ctx, LOG_DEBUG, - "added DMA region %#lx-%#lx offset=%#lx fd=%d prot=%#x", - dma_regions[i].addr, - dma_regions[i].addr + dma_regions[i].size - 1, - dma_regions[i].offset, fd, dma_regions[i].prot); } else { ret = dma_controller_remove_region(vfu_ctx->dma, - dma_regions[i].addr, - dma_regions[i].size, - vfu_ctx->unmap_dma, vfu_ctx); + (void *)region->addr, + region->size, + vfu_ctx->dma_unregister, + vfu_ctx); if (ret < 0) { - vfu_log(vfu_ctx, LOG_INFO, - "failed to remove DMA region %#lx-%#lx: %s", - dma_regions[i].addr, - dma_regions[i].addr + dma_regions[i].size - 1, - strerror(-ret)); + vfu_log(vfu_ctx, LOG_ERR, "failed to remove DMA region %s: %s", + rstr, strerror(-ret)); break; } - vfu_log(vfu_ctx, LOG_DEBUG, - "removed DMA region %#lx-%#lx", - dma_regions[i].addr, - dma_regions[i].addr + dma_regions[i].size - 1); } } return ret; @@ -599,8 +596,10 @@ handle_dirty_pages_get(vfu_ctx_t *vfu_ctx, for (i = 1; i < *nr_iovecs; i++) { struct vfio_iommu_type1_dirty_bitmap_get *r = &ranges[(i - 1)]; /* FIXME ugly indexing */ - ret = dma_controller_dirty_page_get(vfu_ctx->dma, r->iova, r->size, - r->bitmap.pgsize, r->bitmap.size, + ret = dma_controller_dirty_page_get(vfu_ctx->dma, + (vfu_dma_addr_t)r->iova, + r->size, r->bitmap.pgsize, + r->bitmap.size, (char**)&((*iovecs)[i].iov_base)); if (ret != 0) { goto out; @@ -1327,7 +1326,7 @@ vfu_setup_region(vfu_ctx_t *vfu_ctx, int region_idx, size_t size, for (i = 0; i < nr_mmap_areas; i++) { struct iovec *iov = &mmap_areas[i]; - if ((size_t)iov->iov_base + iov->iov_len > size) { + if ((size_t)iov_end(iov) > size) { return ERROR_INT(EINVAL); } } @@ -1385,8 +1384,8 @@ vfu_setup_device_reset_cb(vfu_ctx_t *vfu_ctx, vfu_reset_cb_t *reset) } int -vfu_setup_device_dma_cb(vfu_ctx_t *vfu_ctx, vfu_map_dma_cb_t *map_dma, - vfu_unmap_dma_cb_t *unmap_dma) +vfu_setup_device_dma(vfu_ctx_t *vfu_ctx, vfu_dma_register_cb_t *dma_register, + vfu_dma_unregister_cb_t *dma_unregister) { assert(vfu_ctx != NULL); @@ -1397,8 +1396,8 @@ vfu_setup_device_dma_cb(vfu_ctx_t *vfu_ctx, vfu_map_dma_cb_t *map_dma, return ERROR_INT(ENOMEM); } - vfu_ctx->map_dma = map_dma; - vfu_ctx->unmap_dma = unmap_dma; + vfu_ctx->dma_register = dma_register; + vfu_ctx->dma_unregister = dma_unregister; return 0; } @@ -1450,33 +1449,33 @@ vfu_setup_device_migration_callbacks(vfu_ctx_t *vfu_ctx, return 0; } -inline vfu_reg_info_t * +vfu_reg_info_t * vfu_get_region_info(vfu_ctx_t *vfu_ctx) { assert(vfu_ctx != NULL); return vfu_ctx->reg_info; } -inline int -vfu_addr_to_sg(vfu_ctx_t *vfu_ctx, dma_addr_t dma_addr, - uint32_t len, dma_sg_t *sg, int max_sg, int prot) +int +vfu_addr_to_sg(vfu_ctx_t *vfu_ctx, vfu_dma_addr_t dma_addr, + size_t len, dma_sg_t *sg, int max_sg, int prot) { assert(vfu_ctx != NULL); - if (unlikely(vfu_ctx->unmap_dma == NULL)) { + if (unlikely(vfu_ctx->dma == NULL)) { return ERROR_INT(EINVAL); } return dma_addr_to_sg(vfu_ctx->dma, dma_addr, len, sg, max_sg, prot); } -inline int +int vfu_map_sg(vfu_ctx_t *vfu_ctx, const dma_sg_t *sg, struct iovec *iov, int cnt) { int ret; - if (unlikely(vfu_ctx->unmap_dma == NULL)) { + if (unlikely(vfu_ctx->dma_unregister == NULL)) { return ERROR_INT(EINVAL); } @@ -1488,10 +1487,10 @@ vfu_map_sg(vfu_ctx_t *vfu_ctx, const dma_sg_t *sg, return 0; } -inline void +void vfu_unmap_sg(vfu_ctx_t *vfu_ctx, const dma_sg_t *sg, struct iovec *iov, int cnt) { - if (unlikely(vfu_ctx->unmap_dma == NULL)) { + if (unlikely(vfu_ctx->dma_unregister == NULL)) { return; } return dma_unmap_sg(vfu_ctx->dma, sg, iov, cnt); @@ -1515,7 +1514,7 @@ vfu_dma_read(vfu_ctx_t *vfu_ctx, dma_sg_t *sg, void *data) return ERROR_INT(ENOMEM); } - dma_send.addr = sg->dma_addr; + dma_send.addr = (uint64_t)sg->dma_addr; dma_send.count = sg->length; ret = vfu_ctx->tran->send_msg(vfu_ctx, msg_id, VFIO_USER_DMA_READ, &dma_send, sizeof(dma_send), NULL, @@ -1540,7 +1539,7 @@ vfu_dma_write(vfu_ctx_t *vfu_ctx, dma_sg_t *sg, void *data) if (dma_send == NULL) { return ERROR_INT(ENOMEM); } - dma_send->addr = sg->dma_addr; + dma_send->addr = (uint64_t)sg->dma_addr; dma_send->count = sg->length; memcpy(dma_send->data, data, sg->length); /* FIXME no need to copy! */ ret = vfu_ctx->tran->send_msg(vfu_ctx, msg_id, VFIO_USER_DMA_WRITE, diff --git a/lib/private.h b/lib/private.h index c463fea..49f9152 100644 --- a/lib/private.h +++ b/lib/private.h @@ -122,8 +122,8 @@ struct vfu_ctx { void *tran_data; uint64_t flags; char *uuid; - vfu_map_dma_cb_t *map_dma; - vfu_unmap_dma_cb_t *unmap_dma; + vfu_dma_register_cb_t *dma_register; + vfu_dma_unregister_cb_t *dma_unregister; int client_max_fds; @@ -139,26 +139,26 @@ struct vfu_ctx { void dump_buffer(const char *prefix, const char *buf, uint32_t count); +int +consume_fd(int *fds, size_t nr_fds, size_t index); + vfu_reg_info_t * vfu_get_region_info(vfu_ctx_t *vfu_ctx); +long +dev_get_reginfo(vfu_ctx_t *vfu_ctx, uint32_t index, uint32_t argsz, + struct vfio_region_info **vfio_reg, int **fds, size_t *nr_fds); + int handle_dma_map_or_unmap(vfu_ctx_t *vfu_ctx, uint32_t size, bool map, int *fds, size_t nr_fds, struct vfio_user_dma_region *dma_regions); int -consume_fd(int *fds, size_t nr_fds, size_t index); - -int handle_device_get_info(vfu_ctx_t *vfu_ctx, uint32_t size, struct vfio_device_info *in_dev_info, struct vfio_device_info *out_dev_info); -long -dev_get_reginfo(vfu_ctx_t *vfu_ctx, uint32_t index, uint32_t argsz, - struct vfio_region_info **vfio_reg, int **fds, size_t *nr_fds); - int handle_device_set_irqs(vfu_ctx_t *vfu_ctx, uint32_t size, int *fds, size_t nr_fds, struct vfio_irq_set *irq_set); @@ -181,6 +181,24 @@ MOCK_DECLARE(int, handle_dirty_pages, vfu_ctx_t *vfu_ctx, uint32_t size, struct iovec **iovecs, size_t *nr_iovecs, struct vfio_iommu_type1_dirty_bitmap *dirty_bitmap); +MOCK_DECLARE(bool, should_exec_command, vfu_ctx_t *vfu_ctx, uint16_t cmd); + +MOCK_DECLARE(bool, cmd_allowed_when_stopped_and_copying, uint16_t cmd); + +MOCK_DECLARE(int, get_next_command, vfu_ctx_t *vfu_ctx, + struct vfio_user_header *hdr, int *fds, size_t *nr_fds); + +MOCK_DECLARE(int, exec_command, vfu_ctx_t *vfu_ctx, + struct vfio_user_header *hdr, size_t size, int *fds, size_t nr_fds, + int **fds_out, size_t *nr_fds_out, struct iovec *_iovecs, + struct iovec **iovecs, size_t *nr_iovecs, bool *free_iovec_data); + +MOCK_DECLARE(int, process_request, vfu_ctx_t *vfu_ctx); + +MOCK_DECLARE(int, handle_dirty_pages, vfu_ctx_t *vfu_ctx, uint32_t size, + struct iovec **iovecs, size_t *nr_iovecs, + struct vfio_iommu_type1_dirty_bitmap *dirty_bitmap); + #endif /* LIB_VFIO_USER_PRIVATE_H */ /* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/samples/gpio-pci-idio-16.c b/samples/gpio-pci-idio-16.c index 26ac62a..1efc291 100644 --- a/samples/gpio-pci-idio-16.c +++ b/samples/gpio-pci-idio-16.c @@ -126,13 +126,12 @@ migration_write_data(UNUSED vfu_ctx_t *vfu_ctx, void *buf, __u64 size, } static void -map_dma(vfu_ctx_t *vfu_ctx UNUSED, uint64_t iova UNUSED, uint64_t len UNUSED, - uint32_t prot UNUSED) +dma_register(UNUSED vfu_ctx_t *vfu_ctx, UNUSED vfu_dma_info_t *info) { } static int -unmap_dma(vfu_ctx_t *vfu_ctx UNUSED, uint64_t iova UNUSED, uint64_t len UNUSED) +dma_unregister(UNUSED vfu_ctx_t *vfu_ctx, UNUSED vfu_dma_info_t *info) { return 0; } @@ -142,7 +141,6 @@ main(int argc, char *argv[]) { int ret; bool verbose = false; - bool dma = false; char opt; struct sigaction act = { .sa_handler = _sa_handler }; vfu_ctx_t *vfu_ctx; @@ -159,16 +157,13 @@ main(int argc, char *argv[]) .write_data = &migration_write_data }; - while ((opt = getopt(argc, argv, "vd")) != -1) { + while ((opt = getopt(argc, argv, "v")) != -1) { switch (opt) { case 'v': verbose = true; break; - case 'd': - dma = true; - break; default: /* '?' */ - fprintf(stderr, "Usage: %s [-d] [-v] \n", argv[0]); + fprintf(stderr, "Usage: %s [-v] \n", argv[0]); exit(EXIT_FAILURE); } } @@ -227,11 +222,9 @@ main(int argc, char *argv[]) err(EXIT_FAILURE, "failed to setup irq counts"); } - if (dma) { - ret = vfu_setup_device_dma_cb(vfu_ctx, map_dma, unmap_dma); - if (ret < 0) { - err(EXIT_FAILURE, "failed to setup DMA"); - } + ret = vfu_setup_device_dma(vfu_ctx, dma_register, dma_unregister); + if (ret < 0) { + err(EXIT_FAILURE, "failed to setup DMA"); } ret = vfu_realize_ctx(vfu_ctx); diff --git a/samples/server.c b/samples/server.c index d5318a3..5130688 100644 --- a/samples/server.c +++ b/samples/server.c @@ -48,8 +48,7 @@ #include "tran_sock.h" struct dma_regions { - uint64_t addr; - uint64_t len; + struct iovec iova; uint32_t prot; }; @@ -152,36 +151,35 @@ static void _sa_handler(int signum) } static void -map_dma(vfu_ctx_t *vfu_ctx, uint64_t iova, uint64_t len, uint32_t prot) +dma_register(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info) { struct server_data *server_data = vfu_get_private(vfu_ctx); int idx; for (idx = 0; idx < NR_DMA_REGIONS; idx++) { - if (server_data->regions[idx].addr == 0 && - server_data->regions[idx].len == 0) + if (server_data->regions[idx].iova.iov_base == NULL && + server_data->regions[idx].iova.iov_len == 0) break; } if (idx >= NR_DMA_REGIONS) { errx(EXIT_FAILURE, "Failed to add dma region, slots full\n"); } - server_data->regions[idx].addr = iova; - server_data->regions[idx].len = len; - server_data->regions[idx].prot = prot; + server_data->regions[idx].iova = info->iova; + server_data->regions[idx].prot = info->prot; } static int -unmap_dma(vfu_ctx_t *vfu_ctx, uint64_t iova, uint64_t len) +dma_unregister(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info) { struct server_data *server_data = vfu_get_private(vfu_ctx); int idx; for (idx = 0; idx < NR_DMA_REGIONS; idx++) { - if (server_data->regions[idx].addr == iova && - server_data->regions[idx].len == len) { - server_data->regions[idx].addr = 0; - server_data->regions[idx].len = 0; + if (server_data->regions[idx].iova.iov_len == info->iova.iov_len && + server_data->regions[idx].iova.iov_base == info->iova.iov_base) { + server_data->regions[idx].iova.iov_base = NULL; + server_data->regions[idx].iova.iov_len = 0; return 0; } } @@ -217,26 +215,28 @@ static void do_dma_io(vfu_ctx_t *vfu_ctx, struct server_data *server_data) assert(vfu_ctx != NULL); - ret = vfu_addr_to_sg(vfu_ctx, server_data->regions[0].addr, count, &sg, - 1, PROT_WRITE); + ret = vfu_addr_to_sg(vfu_ctx, + (vfu_dma_addr_t)server_data->regions[0].iova.iov_base, + count, &sg, 1, PROT_WRITE); if (ret < 0) { - errx(EXIT_FAILURE, "failed to map %#lx-%#lx: %s\n", - server_data->regions[0].addr, - server_data->regions[0].addr + count -1, strerror(-ret)); + errx(EXIT_FAILURE, "failed to map %p-%p: %s\n", + server_data->regions[0].iova.iov_base, + server_data->regions[0].iova.iov_base + count -1, + strerror(-ret)); } memset(buf, 'A', count); get_md5sum(buf, count, md5sum1); - printf("%s: WRITE addr %#lx count %d\n", __func__, - server_data->regions[0].addr, count); + printf("%s: WRITE addr %p count %d\n", __func__, + server_data->regions[0].iova.iov_base, count); ret = vfu_dma_write(vfu_ctx, &sg, buf); if (ret < 0) { errx(EXIT_FAILURE, "vfu_dma_write failed: %s\n", strerror(-ret)); } memset(buf, 0, count); - printf("%s: READ addr %#lx count %d\n", __func__, - server_data->regions[0].addr, count); + printf("%s: READ addr %p count %d\n", __func__, + server_data->regions[0].iova.iov_base, count); ret = vfu_dma_read(vfu_ctx, &sg, buf); if (ret < 0) { errx(EXIT_FAILURE, "vfu_dma_read failed: %s\n", strerror(-ret)); @@ -518,7 +518,7 @@ int main(int argc, char *argv[]) err(EXIT_FAILURE, "failed to setup device reset callbacks"); } - ret = vfu_setup_device_dma_cb(vfu_ctx, &map_dma, &unmap_dma); + ret = vfu_setup_device_dma(vfu_ctx, &dma_register, &dma_unregister); if (ret < 0) { err(EXIT_FAILURE, "failed to setup device DMA callbacks"); } diff --git a/test/mocks.c b/test/mocks.c index 3a83ed5..b01011e 100644 --- a/test/mocks.c +++ b/test/mocks.c @@ -50,16 +50,16 @@ struct function const char *name; bool patched; }; -; + static int (*__real_close)(int); static struct function funcs[] = { /* mocked internal funcs */ - { .name = "_dma_controller_do_remove_region" }, { .name = "cmd_allowed_when_stopped_and_copying" }, { .name = "device_is_stopped_and_copying" }, { .name = "device_is_stopped" }, { .name = "dma_controller_add_region" }, + { .name = "dma_controller_unmap_region" }, { .name = "dma_controller_remove_region" }, { .name = "dma_map_region" }, { .name = "exec_command" }, @@ -110,7 +110,7 @@ unpatch_all(void) } int -dma_controller_add_region(dma_controller_t *dma, dma_addr_t dma_addr, +dma_controller_add_region(dma_controller_t *dma, void *dma_addr, size_t size, int fd, off_t offset, uint32_t prot) { @@ -130,36 +130,26 @@ dma_controller_add_region(dma_controller_t *dma, dma_addr_t dma_addr, int dma_controller_remove_region(dma_controller_t *dma, - dma_addr_t dma_addr, size_t size, - vfu_unmap_dma_cb_t *unmap_dma, void *data) + void *dma_addr, size_t size, + vfu_dma_unregister_cb_t *dma_unregister, + void *data) { if (!is_patched("dma_controller_remove_region")) { return __real_dma_controller_remove_region(dma, dma_addr, size, - unmap_dma, data); + dma_unregister, data); } check_expected(dma); check_expected(dma_addr); check_expected(size); - check_expected(unmap_dma); + check_expected(dma_unregister); check_expected(data); return mock(); } -void * -dma_map_region(dma_memory_region_t *region, int prot, size_t offset, - size_t len) -{ - check_expected_ptr(region); - check_expected(prot); - check_expected(offset); - check_expected(len); - return mock_ptr_type(void*); -} - void -_dma_controller_do_remove_region(dma_controller_t *dma, - dma_memory_region_t *region) +dma_controller_unmap_region(dma_controller_t *dma, + dma_memory_region_t *region) { check_expected(dma); check_expected(region); @@ -291,12 +281,18 @@ handle_dirty_pages(vfu_ctx_t *vfu_ctx, uint32_t size, } /* Always mocked. */ +void +mock_dma_register(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info) +{ + check_expected(vfu_ctx); + check_expected(info); +} + int -mock_unmap_dma(vfu_ctx_t *vfu_ctx, uint64_t iova, uint64_t len) +mock_dma_unregister(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info) { check_expected(vfu_ctx); - check_expected(iova); - check_expected(len); + check_expected(info); return mock(); } diff --git a/test/mocks.h b/test/mocks.h index 4ffe047..496cc54 100644 --- a/test/mocks.h +++ b/test/mocks.h @@ -28,16 +28,14 @@ * */ -#include -#include - #include "private.h" -void patch(const char *func); - void unpatch_all(void); -int -mock_unmap_dma(vfu_ctx_t *vfu_ctx, uint64_t iova, uint64_t len); +void patch(const char *name); + +void mock_dma_register(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info); + +int mock_dma_unregister(vfu_ctx_t *vfu_ctx, vfu_dma_info_t *info); /* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/test/unit-tests.c b/test/unit-tests.c index c593b09..3b1d227 100644 --- a/test/unit-tests.c +++ b/test/unit-tests.c @@ -104,20 +104,20 @@ test_dma_map_without_fd(void **state UNUSED) assert_int_equal(0, handle_dma_map_or_unmap(&vfu_ctx, size, true, &fd, 0, &r)); } -static void -dma_map_cb(vfu_ctx_t *vfu_ctx, uint64_t iova, uint64_t len, - uint32_t prot) +static int +check_dma_info(const LargestIntegralType value, + const LargestIntegralType cvalue) { - if (iova == 0xcafebabe) { - assert_int_equal(0x1000, len); - assert_int_equal(PROT_READ | PROT_WRITE, prot); - } else { - assert_int_equal(0xdeadbeef, iova); - assert_int_equal(0x1000, len); - assert_int_equal(PROT_NONE, prot); - } - (*((size_t*)vfu_ctx->pvt))++; - return; + vfu_dma_info_t *info = (vfu_dma_info_t *)value; + vfu_dma_info_t *cinfo = (vfu_dma_info_t *)cvalue; + + return info->iova.iov_base == cinfo->iova.iov_base && + info->iova.iov_len == cinfo->iova.iov_len && + info->vaddr == cinfo->vaddr && + info->mapping.iov_base == cinfo->mapping.iov_base && + info->mapping.iov_len == cinfo->mapping.iov_len && + info->page_size == cinfo->page_size && + info->prot == cinfo->prot; } /* @@ -131,10 +131,11 @@ dma_map_cb(vfu_ctx_t *vfu_ctx, uint64_t iova, uint64_t len, static void test_dma_add_regions_mixed(void **state UNUSED) { - dma_controller_t dma = { 0 }; + dma_controller_t *dma = alloca(sizeof(*dma) + sizeof(dma_memory_region_t) * 2); size_t count = 0; - vfu_ctx_t vfu_ctx = { .dma = &dma , .map_dma = dma_map_cb, .pvt = &count}; - dma.vfu_ctx = &vfu_ctx; + vfu_ctx_t vfu_ctx = { .dma = dma , .dma_register = mock_dma_register, + .pvt = &count }; + dma->vfu_ctx = &vfu_ctx; struct vfio_user_dma_region r[2] = { [0] = { .addr = 0xdeadbeef, @@ -147,11 +148,19 @@ test_dma_add_regions_mixed(void **state UNUSED) .size = 0x1000, .offset = 0x1000, .flags = VFIO_USER_F_DMA_REGION_MAPPABLE, - .prot = PROT_READ|PROT_WRITE + .prot = PROT_READ | PROT_WRITE } }; int fd = 0x0badf00d; + memset(dma, 0, sizeof(*dma) + sizeof(dma_memory_region_t) * 2); + dma->nregions = 2; + dma->regions[0].info.mapping.iov_base = (void *)0x123456789; + dma->regions[0].info.prot = r[0].prot; + dma->regions[1].info.mapping.iov_base = (void *)0x987654321; + dma->regions[1].info.vaddr = (void *)0x987654321; + dma->regions[1].info.prot = r[1].prot; + patch("dma_controller_add_region"); /* 1st region */ will_return(dma_controller_add_region, 0); @@ -161,17 +170,22 @@ test_dma_add_regions_mixed(void **state UNUSED) expect_value(dma_controller_add_region, fd, -1); expect_value(dma_controller_add_region, offset, r[0].offset); expect_value(dma_controller_add_region, prot, r[0].prot); + expect_value(mock_dma_register, vfu_ctx, &vfu_ctx); + expect_check(mock_dma_register, info, check_dma_info, + &dma->regions[0].info); /* 2nd region */ - will_return(dma_controller_add_region, 0); + will_return(dma_controller_add_region, 1); expect_value(dma_controller_add_region, dma, vfu_ctx.dma); expect_value(dma_controller_add_region, dma_addr, r[1].addr); expect_value(dma_controller_add_region, size, r[1].size); expect_value(dma_controller_add_region, fd, fd); expect_value(dma_controller_add_region, offset, r[1].offset); expect_value(dma_controller_add_region, prot, r[1].prot); + expect_value(mock_dma_register, vfu_ctx, &vfu_ctx); + expect_check(mock_dma_register, info, check_dma_info, + &dma->regions[1].info); assert_int_equal(0, handle_dma_map_or_unmap(&vfu_ctx, sizeof(r), true, &fd, 1, r)); - assert_int_equal(2, count); } /* @@ -274,39 +288,49 @@ test_dma_map_return_value(void **state UNUSED) } /* - * Tests that handle_dma_map_or_unmap calls dma_controller_remove_region. + * Tests that handle_dma_map_or_unmap correctly removes a region. */ static void test_handle_dma_unmap(void **state UNUSED) { - dma_controller_t d = { 0 }; + size_t size = sizeof(dma_controller_t) * sizeof(dma_memory_region_t) * 3; + dma_controller_t *d = alloca(size); vfu_ctx_t v = { - .dma = &d, - .map_dma = (void *)0xcafebabe, - .unmap_dma = (void *)0x8badf00d + .dma = d, }; - struct vfio_user_dma_region r[2] = { - { .addr = 0xabcd, .size = 0x1234 }, - { .addr = 0xbcda, .size = 0x4321 } + struct vfio_user_dma_region r = { + .addr = 0x1000, .size = 0x1000 }; + int ret; - patch("dma_controller_add_region"); - patch("dma_controller_remove_region"); - expect_value(dma_controller_remove_region, dma, &d); - expect_value(dma_controller_remove_region, dma_addr, 0xabcd); - expect_value(dma_controller_remove_region, size, 0x1234); - expect_value(dma_controller_remove_region, unmap_dma, 0x8badf00d); - expect_value(dma_controller_remove_region, data, &v); - will_return(dma_controller_remove_region, 0); - expect_value(dma_controller_remove_region, dma, &d); - expect_value(dma_controller_remove_region, dma_addr, 0xbcda); - expect_value(dma_controller_remove_region, size, 0x4321); - expect_value(dma_controller_remove_region, unmap_dma, 0x8badf00d); - expect_value(dma_controller_remove_region, data, &v); - will_return(dma_controller_remove_region, 0); + memset(d, 0, size); - assert_int_equal(0, - handle_dma_map_or_unmap(&v, sizeof(r), false, (void *)0xdeadbeef, 0, r)); + d->nregions = 3; + d->regions[0].info.iova.iov_base = (void *)0x1000; + d->regions[0].info.iova.iov_len = 0x1000; + d->regions[0].fd = -1; + d->regions[1].info.iova.iov_base = (void *)0x4000; + d->regions[1].info.iova.iov_len = 0x2000; + d->regions[1].fd = -1; + d->regions[2].info.iova.iov_base = (void *)0x8000; + d->regions[2].info.iova.iov_len = 0x3000; + d->regions[2].fd = -1; + + v.dma_unregister = mock_dma_unregister; + + expect_value(mock_dma_unregister, vfu_ctx, &v); + expect_check(mock_dma_unregister, info, check_dma_info, + &d->regions[0].info); + will_return(mock_dma_unregister, 0); + + ret = handle_dma_map_or_unmap(&v, sizeof(r), false, NULL, 0, &r); + + assert_int_equal(0, ret); + assert_int_equal(2, d->nregions); + assert_int_equal(0x4000, d->regions[0].info.iova.iov_base); + assert_int_equal(0x2000, d->regions[0].info.iova.iov_len); + assert_int_equal(0x8000, d->regions[1].info.iova.iov_base); + assert_int_equal(0x3000, d->regions[1].info.iova.iov_len); } static void @@ -314,7 +338,7 @@ test_dma_controller_add_region_no_fd(void **state UNUSED) { vfu_ctx_t vfu_ctx = { 0 }; dma_controller_t *dma = alloca(sizeof(*dma) + sizeof(dma_memory_region_t)); - dma_addr_t dma_addr = 0xdeadbeef; + void *dma_addr = (void *)0xdeadbeef; size_t size = 0; int fd = -1; off_t offset = 0; @@ -325,18 +349,22 @@ test_dma_controller_add_region_no_fd(void **state UNUSED) dma->vfu_ctx = &vfu_ctx; dma->max_regions = 1; - assert_int_equal(0, dma_controller_add_region(dma, dma_addr, size, fd, - offset, PROT_NONE)); + assert_int_equal(0, + dma_controller_add_region(dma, dma_addr, size, fd, + offset, PROT_NONE)); + assert_int_equal(1, dma->nregions); r = &dma->regions[0]; - assert_ptr_equal(NULL, r->virt_addr); - assert_ptr_equal(dma_addr, r->dma_addr); - assert_int_equal(size, r->size); - assert_int_equal(0x1000, r->page_size); + assert_ptr_equal(NULL, r->info.vaddr); + assert_ptr_equal(NULL, r->info.mapping.iov_base); + assert_int_equal(0, r->info.mapping.iov_len); + assert_ptr_equal(dma_addr, r->info.iova.iov_base); + assert_int_equal(size, r->info.iova.iov_len); + assert_int_equal(0x1000, r->info.page_size); assert_int_equal(offset, r->offset); assert_int_equal(fd, r->fd); assert_int_equal(0, r->refcnt); - assert_int_equal(PROT_NONE, r->prot); + assert_int_equal(PROT_NONE, r->info.prot); } static void @@ -349,19 +377,22 @@ test_dma_controller_remove_region_mapped(void **state UNUSED) d->vfu_ctx = &v; d->max_regions = d->nregions = 1; - d->regions[0].dma_addr = 0xdeadbeef; - d->regions[0].size = 0x100; - d->regions[0].virt_addr = (void *)0xcafebabe; - expect_value(mock_unmap_dma, vfu_ctx, &v); - expect_value(mock_unmap_dma, iova, 0xdeadbeef); - expect_value(mock_unmap_dma, len, 0x100); - /* FIXME add uni test when unmap_dma fails */ - will_return(mock_unmap_dma, 0); - patch("_dma_controller_do_remove_region"); - expect_value(_dma_controller_do_remove_region, dma, d); - expect_value(_dma_controller_do_remove_region, region, &d->regions[0]); + d->regions[0].info.iova.iov_base = (void *)0xdeadbeef; + d->regions[0].info.iova.iov_len = 0x100; + d->regions[0].info.mapping.iov_base = (void *)0xcafebabe; + d->regions[0].info.mapping.iov_len = 0x1000; + d->regions[0].info.vaddr = (void *)0xcafebabe; + expect_value(mock_dma_unregister, vfu_ctx, &v); + expect_check(mock_dma_unregister, info, check_dma_info, + &d->regions[0].info); + /* FIXME add unit test when dma_unregister fails */ + will_return(mock_dma_unregister, 0); + patch("dma_controller_unmap_region"); + expect_value(dma_controller_unmap_region, dma, d); + expect_value(dma_controller_unmap_region, region, &d->regions[0]); assert_int_equal(0, - dma_controller_remove_region(d, 0xdeadbeef, 0x100, mock_unmap_dma, &v)); + dma_controller_remove_region(d, (void *)0xdeadbeef, 0x100, + mock_dma_unregister, &v)); } static void @@ -374,16 +405,17 @@ test_dma_controller_remove_region_unmapped(void **state UNUSED) d->vfu_ctx = &v; d->max_regions = d->nregions = 1; - d->regions[0].dma_addr = 0xdeadbeef; - d->regions[0].size = 0x100; - d->regions[0].virt_addr = NULL; - expect_value(mock_unmap_dma, vfu_ctx, &v); - expect_value(mock_unmap_dma, iova, 0xdeadbeef); - expect_value(mock_unmap_dma, len, 0x100); - will_return(mock_unmap_dma, 0); - patch("_dma_controller_do_remove_region"); + d->regions[0].info.iova.iov_base = (void *)0xdeadbeef; + d->regions[0].info.iova.iov_len = 0x100; + d->regions[0].fd = -1; + expect_value(mock_dma_unregister, vfu_ctx, &v); + expect_check(mock_dma_unregister, info, check_dma_info, + &d->regions[0].info); + will_return(mock_dma_unregister, 0); + patch("dma_controller_unmap_region"); assert_int_equal(0, - dma_controller_remove_region(d, 0xdeadbeef, 0x100, mock_unmap_dma, &v)); + dma_controller_remove_region(d, (void *)0xdeadbeef, 0x100, + mock_dma_unregister, &v)); } static int fds[] = { 0xab, 0xcd }; @@ -1229,7 +1261,7 @@ test_dma_map_sg(void **state UNUSED) assert_int_equal(-EFAULT, dma_map_sg(dma, &sg, &iovec, 1)); /* w/ fd */ - dma->regions[0].virt_addr = (void*)0xdead0000; + dma->regions[0].info.vaddr = (void *)0xdead0000; sg.offset = 0x0000beef; sg.length = 0xcafebabe; assert_int_equal(0, dma_map_sg(dma, &sg, &iovec, 1)); @@ -1247,44 +1279,44 @@ test_dma_addr_to_sg(void **state UNUSED) dma->nregions = 1; r = &dma->regions[0]; - r->dma_addr = 0x1000; - r->size = 0x4000; - r->virt_addr = (void*)0xdeadbeef; + r->info.iova.iov_base = (void *)0x1000; + r->info.iova.iov_len = 0x4000; + r->info.vaddr = (void *)0xdeadbeef; /* fast path, region hint hit */ - r->prot = PROT_WRITE; + r->info.prot = PROT_WRITE; assert_int_equal(1, - dma_addr_to_sg(dma, 0x2000, 0x400, &sg, 1, PROT_READ)); - assert_int_equal(r->dma_addr, sg.dma_addr); + dma_addr_to_sg(dma, (vfu_dma_addr_t)0x2000, 0x400, &sg, 1, PROT_READ)); + assert_int_equal(r->info.iova.iov_base, sg.dma_addr); assert_int_equal(0, sg.region); - assert_int_equal(0x2000 - r->dma_addr, sg.offset); + assert_int_equal(0x2000 - (unsigned long long)r->info.iova.iov_base, sg.offset); assert_int_equal(0x400, sg.length); assert_true(sg.mappable); errno = 0; - r->prot = PROT_WRITE; + r->info.prot = PROT_WRITE; assert_int_equal(-1, - dma_addr_to_sg(dma, 0x6000, 0x400, &sg, 1, PROT_READ)); + dma_addr_to_sg(dma, (vfu_dma_addr_t)0x6000, 0x400, &sg, 1, PROT_READ)); assert_int_equal(ENOENT, errno); - r->prot = PROT_READ; + r->info.prot = PROT_READ; assert_int_equal(-1, - dma_addr_to_sg(dma, 0x2000, 0x400, &sg, 1, PROT_WRITE)); + dma_addr_to_sg(dma, (vfu_dma_addr_t)0x2000, 0x400, &sg, 1, PROT_WRITE)); assert_int_equal(EACCES, errno); - r->prot = PROT_READ|PROT_WRITE; + r->info.prot = PROT_READ|PROT_WRITE; assert_int_equal(1, - dma_addr_to_sg(dma, 0x2000, 0x400, &sg, 1, PROT_READ)); + dma_addr_to_sg(dma, (vfu_dma_addr_t)0x2000, 0x400, &sg, 1, PROT_READ)); /* TODO test more scenarios */ } static void -test_vfu_setup_device_dma_cb(void **state UNUSED) +test_vfu_setup_device_dma(void **state UNUSED) { vfu_ctx_t vfu_ctx = { 0 }; - assert_int_equal(0, vfu_setup_device_dma_cb(&vfu_ctx, NULL, NULL)); + assert_int_equal(0, vfu_setup_device_dma(&vfu_ctx, NULL, NULL)); assert_non_null(vfu_ctx.dma); free(vfu_ctx.dma); } @@ -1876,7 +1908,7 @@ main(void) cmocka_unit_test_setup(test_dma_map_return_value, setup), cmocka_unit_test_setup(test_dma_map_sg, setup), cmocka_unit_test_setup(test_dma_addr_to_sg, setup), - cmocka_unit_test_setup(test_vfu_setup_device_dma_cb, setup), + cmocka_unit_test_setup(test_vfu_setup_device_dma, setup), cmocka_unit_test_setup(test_migration_state_transitions, setup), cmocka_unit_test_setup_teardown(test_setup_migration_region_too_small, setup_test_setup_migration_region, -- cgit v1.1