diff options
author | John Levon <john.levon@nutanix.com> | 2021-03-31 16:49:52 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-03-31 16:49:52 +0100 |
commit | 14b237fc80473bf4419a2bccda4cf7e7d382c174 (patch) | |
tree | 959e0cc1c4d2557f624df6fa5c189a8cdc92993f /lib | |
parent | 0ae00cbb9edcc3879b1276cd61479d668a7f1ec9 (diff) | |
download | libvfio-user-14b237fc80473bf4419a2bccda4cf7e7d382c174.zip libvfio-user-14b237fc80473bf4419a2bccda4cf7e7d382c174.tar.gz libvfio-user-14b237fc80473bf4419a2bccda4cf7e7d382c174.tar.bz2 |
rework DMA callbacks (#396)
This fixes a number of issues with how DMA is handled, based on some changes by
Thanos Makatos:
- rename callbacks to register/unregister, as there is not necessarily
any mapping
- provide the (large) page-aligned mapped start and size, the page size used,
as well as the protection flags: some API users need these
- for convenience, provide the virtual address separately that corresponds to
the mapped region
- we should only require a DMA controller to use vfu_addr_to_sg(),
not an unregister callback
- the callbacks should return errno not -errno
- region removal was incorrectly updating the region array
- various other cleanups and clarifications
Signed-off-by: John Levon <john.levon@nutanix.com>
Reviewed-by: Thanos Makatos <thanos.makatos@nutanix.com>
Diffstat (limited to 'lib')
-rw-r--r-- | lib/dma.c | 276 | ||||
-rw-r--r-- | lib/dma.h | 108 | ||||
-rw-r--r-- | lib/irq.c | 4 | ||||
-rw-r--r-- | lib/libvfio-user.c | 107 | ||||
-rw-r--r-- | lib/private.h | 36 |
5 files changed, 253 insertions, 278 deletions
@@ -92,54 +92,53 @@ dma_controller_create(vfu_ctx_t *vfu_ctx, int max_regions) } void -MOCK_DEFINE(_dma_controller_do_remove_region)(dma_controller_t *dma, - dma_memory_region_t *region) +MOCK_DEFINE(dma_controller_unmap_region)(dma_controller_t *dma, + dma_memory_region_t *region) { int err; assert(dma != NULL); assert(region != NULL); - err = dma_unmap_region(region, region->virt_addr, region->size); + err = munmap(region->info.mapping.iov_base, region->info.mapping.iov_len); if (err != 0) { - vfu_log(dma->vfu_ctx, LOG_DEBUG, "failed to unmap fd=%d vaddr=%p-%p\n", - region->fd, region->virt_addr, - region->virt_addr + region->size - 1); + vfu_log(dma->vfu_ctx, LOG_DEBUG, "failed to unmap fd=%d " + "mapping=[%p, %p): %m\n", + region->fd, region->info.mapping.iov_base, + iov_end(®ion->info.mapping)); } - if (region->fd != -1) { - if (close(region->fd) == -1) { - vfu_log(dma->vfu_ctx, LOG_DEBUG, - "failed to close fd %d: %m\n", region->fd); - } + + assert(region->fd != -1); + + if (close(region->fd) == -1) { + vfu_log(dma->vfu_ctx, LOG_WARNING, "failed to close fd %d: %m\n", + region->fd); } } -/* - * FIXME no longer used. Also, it doesn't work for addresses that span two - * DMA regions. - */ -bool -dma_controller_region_valid(dma_controller_t *dma, dma_addr_t dma_addr, - size_t size) +static void +array_remove(void *array, size_t elem_size, size_t index, int *nr_elemsp) { - dma_memory_region_t *region; - int i; + void *dest; + void *src; + size_t nr; - for (i = 0; i < dma->nregions; i++) { - region = &dma->regions[i]; - if (dma_addr == region->dma_addr && size <= region->size) { - return true; - } - } + assert((size_t)*nr_elemsp > index); + + nr = *nr_elemsp - (index + 1); + dest = (char *)array + (index * elem_size); + src = (char *)array + ((index + 1) * elem_size); + + memmove(dest, src, nr * elem_size); - return false; + (*nr_elemsp)--; } /* FIXME not thread safe */ int MOCK_DEFINE(dma_controller_remove_region)(dma_controller_t *dma, - dma_addr_t dma_addr, size_t size, - vfu_unmap_dma_cb_t *unmap_dma, + vfu_dma_addr_t dma_addr, size_t size, + vfu_dma_unregister_cb_t *dma_unregister, void *data) { int idx; @@ -150,29 +149,29 @@ MOCK_DEFINE(dma_controller_remove_region)(dma_controller_t *dma, for (idx = 0; idx < dma->nregions; idx++) { region = &dma->regions[idx]; - if (region->dma_addr != dma_addr || region->size != size) { + if (region->info.iova.iov_base != dma_addr || + region->info.iova.iov_len != size) { continue; } - err = unmap_dma(data, region->dma_addr, region->size); + + err = dma_unregister(data, ®ion->info); if (err != 0) { vfu_log(dma->vfu_ctx, LOG_ERR, - "failed to notify of removal of DMA region %#lx-%#lx: %s\n", - region->dma_addr, region->dma_addr + region->size, - strerror(-err)); + "failed to dma_unregister() DMA region [%#lx, %#lx): %s\n", + region->info.iova.iov_base, iov_end(®ion->info.iova), + strerror(err)); return err; } + assert(region->refcnt == 0); - if (region->virt_addr != NULL) { - _dma_controller_do_remove_region(dma, region); + + if (region->info.vaddr != NULL) { + dma_controller_unmap_region(dma, region); + } else { + assert(region->fd == -1); } - if (dma->nregions > 1) - /* - * FIXME valgrind complains with 'Source and destination overlap in memcpy', - * check whether memmove eliminates this warning. - */ - memcpy(region, &dma->regions[dma->nregions - 1], - sizeof(*region)); - dma->nregions--; + + array_remove(&dma->regions, sizeof (*region), idx, &dma->nregions); return 0; } return -ENOENT; @@ -183,15 +182,22 @@ dma_controller_remove_regions(dma_controller_t *dma) { int i; - assert(dma); + assert(dma != NULL); for (i = 0; i < dma->nregions; i++) { dma_memory_region_t *region = &dma->regions[i]; - vfu_log(dma->vfu_ctx, LOG_INFO, "unmap vaddr=%p IOVA=%lx", - region->virt_addr, region->dma_addr); + vfu_log(dma->vfu_ctx, LOG_DEBUG, "removing DMA region " + "iova=[%#lx, %#lx) vaddr=%#lx mapping=[%#lx, %#lx)", + region->info.iova.iov_base, iov_end(®ion->info.iova), + region->info.vaddr, + region->info.mapping.iov_base, iov_end(®ion->info.mapping)); - _dma_controller_do_remove_region(dma, region); + if (region->info.vaddr != NULL) { + dma_controller_unmap_region(dma, region); + } else { + assert(region->fd == -1); + } } } @@ -206,26 +212,65 @@ dma_controller_destroy(dma_controller_t *dma) free(dma); } +static int +dma_map_region(dma_controller_t *dma, dma_memory_region_t *region) +{ + void *mmap_base; + size_t mmap_len; + off_t offset; + + offset = ROUND_DOWN(region->offset, region->info.page_size); + mmap_len = ROUND_UP(region->info.iova.iov_len, region->info.page_size); + + mmap_base = mmap(NULL, mmap_len, region->info.prot, MAP_SHARED, + region->fd, offset); + + if (mmap_base == MAP_FAILED) { + return -errno; + } + + // Do not dump. + madvise(mmap_base, mmap_len, MADV_DONTDUMP); + + region->info.mapping.iov_base = mmap_base; + region->info.mapping.iov_len = mmap_len; + region->info.vaddr = mmap_base + (region->offset - offset); + + vfu_log(dma->vfu_ctx, LOG_DEBUG, "mapped DMA region iova=[%#lx, %#lx) " + "vaddr=%#lx page_size=%#lx mapping=[%#lx, %#lx)", + region->info.iova.iov_base, iov_end(®ion->info.iova), + region->info.vaddr, region->info.page_size, + region->info.mapping.iov_base, iov_end(®ion->info.mapping)); + + + return 0; +} + int MOCK_DEFINE(dma_controller_add_region)(dma_controller_t *dma, - dma_addr_t dma_addr, size_t size, + vfu_dma_addr_t dma_addr, size_t size, int fd, off_t offset, uint32_t prot) { - int idx; dma_memory_region_t *region; int page_size = 0; + char rstr[1024]; + int idx; + int ret; assert(dma != NULL); + snprintf(rstr, sizeof(rstr), "[%p, %p) fd=%d offset=%#lx prot=%#x", + dma_addr, (char *)dma_addr + size, fd, offset, prot); + for (idx = 0; idx < dma->nregions; idx++) { region = &dma->regions[idx]; /* First check if this is the same exact region. */ - if (region->dma_addr == dma_addr && region->size == size) { + if (region->info.iova.iov_base == dma_addr && + region->info.iova.iov_len == size) { if (offset != region->offset) { - vfu_log(dma->vfu_ctx, LOG_ERR, - "bad offset for new DMA region %#lx-%#lx, want=%ld, existing=%ld\n", - dma_addr, dma_addr + size, offset, region->offset); + vfu_log(dma->vfu_ctx, LOG_ERR, "bad offset for new DMA region " + "%s; existing=%#lx\n", rstr, region->offset); goto err; } if (!fds_are_same_file(region->fd, fd)) { @@ -235,36 +280,34 @@ MOCK_DEFINE(dma_controller_add_region)(dma_controller_t *dma, * the same file, however in the majority of cases we'll be * using a single fd. */ - vfu_log(dma->vfu_ctx, LOG_ERR, - "bad fd=%d for new DMA region %#lx-%#lx, existing fd=%d\n", - fd, offset, offset + size - 1, region->fd); + vfu_log(dma->vfu_ctx, LOG_ERR, "bad fd for new DMA region %s; " + "existing=%d\n", rstr, region->fd); goto err; } - if (region->prot != prot) { - vfu_log(dma->vfu_ctx, LOG_ERR, "bad prot=%#x " - "for new DMA region %#lx-%#lx, existing prot=%#x\n", - prot, offset, offset + size - 1, region->prot); + if (region->info.prot != prot) { + vfu_log(dma->vfu_ctx, LOG_ERR, "bad prot for new DMA region " + "%s; existing=%#x", rstr, region->info.prot); goto err; } return idx; } /* Check for overlap, i.e. start of one region is within another. */ - if ((dma_addr >= region->dma_addr && - dma_addr < region->dma_addr + region->size) || - (region->dma_addr >= dma_addr && - region->dma_addr < dma_addr + size)) { - vfu_log(dma->vfu_ctx, LOG_INFO, - "new DMA region %#lx+%#lx overlaps with DMA region %#lx-%#lx\n", - dma_addr, size, region->dma_addr, region->size); + if ((dma_addr >= region->info.iova.iov_base && + dma_addr < iov_end(®ion->info.iova)) || + (region->info.iova.iov_base >= dma_addr && + region->info.iova.iov_base < dma_addr + size)) { + vfu_log(dma->vfu_ctx, LOG_INFO, "new DMA region %s overlaps with " + "DMA region [%#lx, %#lx)", rstr, region->info.iova.iov_base, + iov_end(®ion->info.iova)); goto err; } } if (dma->nregions == dma->max_regions) { idx = dma->max_regions; - vfu_log(dma->vfu_ctx, LOG_ERR, - "reached maxed regions, recompile with higher number of DMA regions\n"); + vfu_log(dma->vfu_ctx, LOG_ERR, "hit max regions %d\n", + dma->max_regions); goto err; } @@ -280,32 +323,31 @@ MOCK_DEFINE(dma_controller_add_region)(dma_controller_t *dma, } page_size = MAX(page_size, getpagesize()); - region->dma_addr = dma_addr; - region->size = size; - region->page_size = page_size; + memset(region, 0, sizeof (*region)); + + region->info.iova.iov_base = (void *)dma_addr; + region->info.iova.iov_len = size; + region->info.page_size = page_size; + region->info.prot = prot; region->offset = offset; - region->prot = prot; region->fd = fd; - region->refcnt = 0; if (fd != -1) { - region->virt_addr = dma_map_region(region, region->prot, 0, - region->size); - if (region->virt_addr == MAP_FAILED) { + ret = dma_map_region(dma, region); + + if (ret != 0) { vfu_log(dma->vfu_ctx, LOG_ERR, - "failed to memory map DMA region %#lx-%#lx: %s\n", - dma_addr, dma_addr + size, strerror(errno)); - if (region->fd != -1) { - if (close(region->fd) == -1) { - vfu_log(dma->vfu_ctx, LOG_DEBUG, - "failed to close fd %d: %m\n", region->fd); - } + "failed to memory map DMA region %s: %s\n", rstr, + strerror(-ret)); + + if (close(region->fd) == -1) { + vfu_log(dma->vfu_ctx, LOG_WARNING, + "failed to close fd %d: %m\n", region->fd); } goto err; } - } else { - region->virt_addr = NULL; } + dma->nregions++; return idx; @@ -314,53 +356,9 @@ err: return -idx - 1; } -static inline void -mmap_round(size_t *offset, size_t *size, size_t page_size) -{ - size_t offset_orig = *offset; - *offset = ROUND_DOWN(offset_orig, page_size); - *size = ROUND_UP(offset_orig + *size, page_size) - *offset; -} - -void * -MOCK_DEFINE(dma_map_region)(dma_memory_region_t *region, int prot, - size_t offset, size_t len) -{ - size_t mmap_offset, mmap_size = len; - char *mmap_base; - - if (offset >= region->size || offset + len > region->size) { - return MAP_FAILED; - } - - offset += region->offset; - mmap_offset = offset; - mmap_round(&mmap_offset, &mmap_size, region->page_size); - - // Do the mmap. - // Note: As per mmap(2) manpage, on some hardware architectures - // (e.g., i386), PROT_WRITE implies PROT_READ - mmap_base = mmap(NULL, mmap_size, prot, MAP_SHARED, - region->fd, mmap_offset); - if (mmap_base == MAP_FAILED) { - return mmap_base; - } - // Do not dump. - madvise(mmap_base, mmap_size, MADV_DONTDUMP); - - return mmap_base + (offset - mmap_offset); -} - -int -dma_unmap_region(dma_memory_region_t *region, void *virt_addr, size_t len) -{ - mmap_round((size_t *)&virt_addr, &len, region->page_size); - return munmap(virt_addr, len); -} - int _dma_addr_sg_split(const dma_controller_t *dma, - dma_addr_t dma_addr, uint32_t len, + vfu_dma_addr_t dma_addr, uint32_t len, dma_sg_t *sg, int max_sg, int prot) { int idx; @@ -371,9 +369,10 @@ _dma_addr_sg_split(const dma_controller_t *dma, found = false; for (idx = 0; idx < dma->nregions; idx++) { const dma_memory_region_t *const region = &dma->regions[idx]; - const dma_addr_t region_end = region->dma_addr + region->size; + vfu_dma_addr_t region_start = region->info.iova.iov_base; + vfu_dma_addr_t region_end = iov_end(®ion->info.iova); - while (dma_addr >= region->dma_addr && dma_addr < region_end) { + while (dma_addr >= region_start && dma_addr < region_end) { size_t region_len = MIN(region_end - dma_addr, len); if (cnt < max_sg) { @@ -443,7 +442,10 @@ int dma_controller_dirty_page_logging_start(dma_controller_t *dma, size_t pgsize for (i = 0; i < dma->nregions; i++) { dma_memory_region_t *region = &dma->regions[i]; - ssize_t bitmap_size = get_bitmap_size(region->size, pgsize); + ssize_t bitmap_size; + + bitmap_size = get_bitmap_size(region->info.iova.iov_len, pgsize); + if (bitmap_size < 0) { return bitmap_size; } @@ -481,8 +483,8 @@ int dma_controller_dirty_page_logging_stop(dma_controller_t *dma) } int -dma_controller_dirty_page_get(dma_controller_t *dma, dma_addr_t addr, int len, - size_t pgsize, size_t size, char **data) +dma_controller_dirty_page_get(dma_controller_t *dma, vfu_dma_addr_t addr, + int len, size_t pgsize, size_t size, char **data) { int ret; ssize_t bitmap_size; @@ -51,8 +51,6 @@ * list (sglist) of dma_sg_t from DMA addresses. Then the sglist * can be mapped using dma_map_sg() into the process's virtual address space * as an iovec for direct access, and unmapped using dma_unmap_sg() when done. - * - dma_map_addr() and dma_unmap_addr() helper functions are provided - * for mapping DMA regions that can fit into one scatter-gather entry. * Every region is mapped into the application's virtual address space * at registration time with R/W permissions. * dma_map_sg() ignores all protection bits and only does lookups and @@ -79,18 +77,14 @@ #include "libvfio-user.h" #include "common.h" +#define iov_end(iov) ((iov)->iov_base + (iov)->iov_len) + struct vfu_ctx; typedef struct { - dma_addr_t dma_addr; // DMA address of this region - uint32_t prot; // memory protection of the mapping - // defined in sys/mman.h - - size_t size; // Size of this region + vfu_dma_info_t info; int fd; // File descriptor to mmap - int page_size; // Page size of this fd off_t offset; // File offset - void *virt_addr; // Virtual address of this region int refcnt; // Number of users of this region char *dirty_bitmap; // Dirty page bitmap } dma_memory_region_t; @@ -117,20 +111,20 @@ dma_controller_destroy(dma_controller_t *dma); * (e.g. due to conflict with existing region). */ MOCK_DECLARE(int, dma_controller_add_region, dma_controller_t *dma, - dma_addr_t dma_addr, size_t size, int fd, off_t offset, + vfu_dma_addr_t dma_addr, size_t size, int fd, off_t offset, uint32_t prot); -MOCK_DECLARE(void, _dma_controller_do_remove_region, dma_controller_t *dma, - dma_memory_region_t *region); - MOCK_DECLARE(int, dma_controller_remove_region, dma_controller_t *dma, - dma_addr_t dma_addr, size_t size, vfu_unmap_dma_cb_t *unmap_dma, - void *data); + vfu_dma_addr_t dma_addr, size_t size, + vfu_dma_unregister_cb_t *dma_unregister, void *data); + +MOCK_DECLARE(void, dma_controller_unmap_region, dma_controller_t *dma, + dma_memory_region_t *region); // Helper for dma_addr_to_sg() slow path. int _dma_addr_sg_split(const dma_controller_t *dma, - dma_addr_t dma_addr, uint32_t len, + vfu_dma_addr_t dma_addr, uint32_t len, dma_sg_t *sg, int max_sg, int prot); static bool @@ -142,9 +136,9 @@ _dma_should_mark_dirty(const dma_controller_t *dma, int prot) } static size_t -_get_pgstart(size_t pgsize, uint64_t base_addr, uint64_t offset) +_get_pgstart(size_t pgsize, void *base_addr, uint64_t offset) { - return (offset - base_addr) / pgsize; + return (offset - (uint64_t)base_addr) / pgsize; } static size_t @@ -164,7 +158,7 @@ _dma_bitmap_get_pgrange(const dma_controller_t *dma, assert(start != NULL); assert(end != NULL); - *start = _get_pgstart(dma->dirty_pgsize, region->dma_addr, sg->offset); + *start = _get_pgstart(dma->dirty_pgsize, region->info.iova.iov_base, sg->offset); *end = _get_pgend(dma->dirty_pgsize, sg->length, *start); } @@ -186,24 +180,24 @@ _dma_mark_dirty(const dma_controller_t *dma, const dma_memory_region_t *region, } static inline int -dma_init_sg(const dma_controller_t *dma, dma_sg_t *sg, dma_addr_t dma_addr, +dma_init_sg(const dma_controller_t *dma, dma_sg_t *sg, vfu_dma_addr_t dma_addr, uint32_t len, int prot, int region_index) { const dma_memory_region_t *const region = &dma->regions[region_index]; - if ((prot & PROT_WRITE) && !(region->prot & PROT_WRITE)) { + if ((prot & PROT_WRITE) && !(region->info.prot & PROT_WRITE)) { errno = EACCES; return -1; } - sg->dma_addr = region->dma_addr; + sg->dma_addr = region->info.iova.iov_base; sg->region = region_index; - sg->offset = dma_addr - region->dma_addr; + sg->offset = dma_addr - region->info.iova.iov_base; sg->length = len; if (_dma_should_mark_dirty(dma, prot)) { _dma_mark_dirty(dma, region, sg); } - sg->mappable = region->virt_addr != NULL; + sg->mappable = (region->info.vaddr != NULL); return 0; } @@ -223,18 +217,19 @@ dma_init_sg(const dma_controller_t *dma, dma_sg_t *sg, dma_addr_t dma_addr, */ static inline int dma_addr_to_sg(const dma_controller_t *dma, - dma_addr_t dma_addr, uint32_t len, + vfu_dma_addr_t dma_addr, size_t len, dma_sg_t *sg, int max_sg, int prot) { static __thread int region_hint; int cnt, ret; const dma_memory_region_t *const region = &dma->regions[region_hint]; - const dma_addr_t region_end = region->dma_addr + region->size; + const void *region_end = iov_end(®ion->info.iova); // Fast path: single region. if (likely(max_sg > 0 && len > 0 && - dma_addr >= region->dma_addr && dma_addr + len <= region_end && + dma_addr >= region->info.iova.iov_base && + dma_addr + len <= region_end && region_hint < dma->nregions)) { ret = dma_init_sg(dma, sg, dma_addr, len, prot, region_hint); if (ret < 0) { @@ -251,12 +246,6 @@ dma_addr_to_sg(const dma_controller_t *dma, return cnt; } -MOCK_DECLARE(void *, dma_map_region, dma_memory_region_t *region, int prot, - size_t offset, size_t len); - -int -dma_unmap_region(dma_memory_region_t *region, void *virt_addr, size_t len); - static inline int dma_map_sg(dma_controller_t *dma, const dma_sg_t *sg, struct iovec *iov, int cnt) @@ -273,12 +262,15 @@ dma_map_sg(dma_controller_t *dma, const dma_sg_t *sg, struct iovec *iov, return -EINVAL; } region = &dma->regions[sg[i].region]; - if (region->virt_addr == NULL) { + + if (region->info.vaddr == NULL) { return -EFAULT; } + vfu_log(dma->vfu_ctx, LOG_DEBUG, "map %#lx-%#lx\n", - sg->dma_addr + sg->offset, sg->dma_addr + sg->offset + sg->length); - iov[i].iov_base = region->virt_addr + sg[i].offset; + sg->dma_addr + sg->offset, + sg->dma_addr + sg->offset + sg->length); + iov[i].iov_base = region->info.vaddr + sg[i].offset; iov[i].iov_len = sg[i].length; region->refcnt++; } @@ -299,7 +291,8 @@ dma_unmap_sg(dma_controller_t *dma, const dma_sg_t *sg, * tfind(3) */ for (r = dma->regions; - r < dma->regions + dma->nregions && r->dma_addr != sg[i].dma_addr; + r < dma->regions + dma->nregions && + r->info.iova.iov_base != sg[i].dma_addr; r++); if (r > dma->regions + dma->nregions) { /* bad region */ @@ -312,39 +305,6 @@ dma_unmap_sg(dma_controller_t *dma, const dma_sg_t *sg, return; } -static inline void * -dma_map_addr(dma_controller_t *dma, dma_addr_t dma_addr, uint32_t len, int prot) -{ - dma_sg_t sg; - struct iovec iov; - - if (dma_addr_to_sg(dma, dma_addr, len, &sg, 1, prot) == 1 && - dma_map_sg(dma, &sg, &iov, 1) == 0) { - return iov.iov_base; - } - - return NULL; -} - -static inline void -dma_unmap_addr(dma_controller_t *dma, - dma_addr_t dma_addr, uint32_t len, void *addr) -{ - dma_sg_t sg; - struct iovec iov = { - .iov_base = addr, - .iov_len = len, - }; - int r; - - r = dma_addr_to_sg(dma, dma_addr, len, &sg, 1, PROT_NONE); - if (r != 1) { - assert(false); - } - - dma_unmap_sg(dma, &sg, &iov, 1); -} - int dma_controller_dirty_page_logging_start(dma_controller_t *dma, size_t pgsize); @@ -352,12 +312,8 @@ int dma_controller_dirty_page_logging_stop(dma_controller_t *dma); int -dma_controller_dirty_page_get(dma_controller_t *dma, dma_addr_t addr, int len, - size_t pgsize, size_t size, char **data); - -bool -dma_controller_region_valid(dma_controller_t *dma, dma_addr_t dma_addr, - size_t size); +dma_controller_dirty_page_get(dma_controller_t *dma, vfu_dma_addr_t addr, + int len, size_t pgsize, size_t size, char **data); #endif /* LIB_VFIO_USER_DMA_H */ @@ -106,7 +106,7 @@ irqs_disable(vfu_ctx_t *vfu_ctx, uint32_t index, uint32_t start, uint32_t count) count = vfu_ctx->irq_count[index]; } - vfu_log(vfu_ctx, LOG_DEBUG, "disabling IRQ type %s range [%u-%u)", + vfu_log(vfu_ctx, LOG_DEBUG, "disabling IRQ type %s range [%u, %u)", vfio_irq_idx_to_str(index), start, start + count); switch (index) { @@ -335,7 +335,7 @@ handle_device_set_irqs(vfu_ctx_t *vfu_ctx, uint32_t size, irq_set->start, irq_set->count); } - vfu_log(vfu_ctx, LOG_DEBUG, "setting IRQ %s flags=%#x range [%u-%u)", + vfu_log(vfu_ctx, LOG_DEBUG, "setting IRQ %s flags=%#x range [%u, %u)", vfio_irq_idx_to_str(irq_set->index), irq_set->flags, irq_set->start, irq_set->start + irq_set->count); diff --git a/lib/libvfio-user.c b/lib/libvfio-user.c index 40a23b8..ebd70af 100644 --- a/lib/libvfio-user.c +++ b/lib/libvfio-user.c @@ -56,6 +56,7 @@ #include "private.h" #include "tran_sock.h" + void vfu_log(vfu_ctx_t *vfu_ctx, int level, const char *fmt, ...) { @@ -152,8 +153,8 @@ dev_get_caps(vfu_ctx_t *vfu_ctx, vfu_reg_info_t *vfu_reg, bool is_migr_reg, for (i = 0; i < nr_mmap_areas; i++) { struct iovec *iov = &vfu_reg->mmap_areas[i]; - vfu_log(vfu_ctx, LOG_DEBUG, "%s: area %d [%#llx-%#llx)", __func__, - i, iov->iov_base, iov->iov_base + iov->iov_len); + vfu_log(vfu_ctx, LOG_DEBUG, "%s: area %d [%#llx, %#llx)", __func__, + i, iov->iov_base, iov_end(iov)); (*fds)[i] = vfu_reg->fd; sparse->areas[i].offset = (uintptr_t)iov->iov_base; @@ -487,7 +488,7 @@ handle_dma_map_or_unmap(vfu_ctx_t *vfu_ctx, uint32_t size, bool map, size_t fdi; assert(vfu_ctx != NULL); - assert(fds != NULL); /* TODO assert valid only for map */ + assert(nr_fds == 0 || fds != NULL); if (vfu_ctx->dma == NULL) { return 0; @@ -504,61 +505,57 @@ handle_dma_map_or_unmap(vfu_ctx_t *vfu_ctx, uint32_t size, bool map, } for (i = 0, fdi = 0; i < nr_dma_regions; i++) { + struct vfio_user_dma_region *region = &dma_regions[i]; + char rstr[1024]; + + snprintf(rstr, sizeof(rstr), "[%#lx, %#lx) offset=%#lx " + "prot=%#x flags=%#x", region->addr, region->addr + region->size, + region->offset, region->prot, region->flags); + + vfu_log(vfu_ctx, LOG_DEBUG, "%s DMA region %s", + map ? "adding" : "removing", rstr); + if (map) { int fd = -1; - if (dma_regions[i].flags == VFIO_USER_F_DMA_REGION_MAPPABLE) { + if (region->flags == VFIO_USER_F_DMA_REGION_MAPPABLE) { fd = consume_fd(fds, nr_fds, fdi++); if (fd < 0) { + vfu_log(vfu_ctx, LOG_ERR, "failed to add DMA region %s: " + "mappable but fd not provided", rstr); ret = fd; break; } } - ret = dma_controller_add_region(vfu_ctx->dma, - dma_regions[i].addr, - dma_regions[i].size, - fd, - dma_regions[i].offset, - dma_regions[i].prot); + ret = dma_controller_add_region(vfu_ctx->dma, (void *)region->addr, + region->size, fd, region->offset, + region->prot); if (ret < 0) { if (fd != -1) { close(fd); } - vfu_log(vfu_ctx, LOG_INFO, - "failed to add DMA region %#lx-%#lx offset=%#lx fd=%d: %s", - dma_regions[i].addr, - dma_regions[i].addr + dma_regions[i].size - 1, - dma_regions[i].offset, fd, - strerror(-ret)); + vfu_log(vfu_ctx, LOG_ERR, "failed to add DMA region %s: %s", + rstr, strerror(-ret)); break; } - if (vfu_ctx->map_dma != NULL) { - vfu_ctx->map_dma(vfu_ctx, dma_regions[i].addr, - dma_regions[i].size, dma_regions[i].prot); + + if (vfu_ctx->dma_register != NULL) { + vfu_ctx->dma_register(vfu_ctx, + &vfu_ctx->dma->regions[ret].info); } + ret = 0; - vfu_log(vfu_ctx, LOG_DEBUG, - "added DMA region %#lx-%#lx offset=%#lx fd=%d prot=%#x", - dma_regions[i].addr, - dma_regions[i].addr + dma_regions[i].size - 1, - dma_regions[i].offset, fd, dma_regions[i].prot); } else { ret = dma_controller_remove_region(vfu_ctx->dma, - dma_regions[i].addr, - dma_regions[i].size, - vfu_ctx->unmap_dma, vfu_ctx); + (void *)region->addr, + region->size, + vfu_ctx->dma_unregister, + vfu_ctx); if (ret < 0) { - vfu_log(vfu_ctx, LOG_INFO, - "failed to remove DMA region %#lx-%#lx: %s", - dma_regions[i].addr, - dma_regions[i].addr + dma_regions[i].size - 1, - strerror(-ret)); + vfu_log(vfu_ctx, LOG_ERR, "failed to remove DMA region %s: %s", + rstr, strerror(-ret)); break; } - vfu_log(vfu_ctx, LOG_DEBUG, - "removed DMA region %#lx-%#lx", - dma_regions[i].addr, - dma_regions[i].addr + dma_regions[i].size - 1); } } return ret; @@ -599,8 +596,10 @@ handle_dirty_pages_get(vfu_ctx_t *vfu_ctx, for (i = 1; i < *nr_iovecs; i++) { struct vfio_iommu_type1_dirty_bitmap_get *r = &ranges[(i - 1)]; /* FIXME ugly indexing */ - ret = dma_controller_dirty_page_get(vfu_ctx->dma, r->iova, r->size, - r->bitmap.pgsize, r->bitmap.size, + ret = dma_controller_dirty_page_get(vfu_ctx->dma, + (vfu_dma_addr_t)r->iova, + r->size, r->bitmap.pgsize, + r->bitmap.size, (char**)&((*iovecs)[i].iov_base)); if (ret != 0) { goto out; @@ -1327,7 +1326,7 @@ vfu_setup_region(vfu_ctx_t *vfu_ctx, int region_idx, size_t size, for (i = 0; i < nr_mmap_areas; i++) { struct iovec *iov = &mmap_areas[i]; - if ((size_t)iov->iov_base + iov->iov_len > size) { + if ((size_t)iov_end(iov) > size) { return ERROR_INT(EINVAL); } } @@ -1385,8 +1384,8 @@ vfu_setup_device_reset_cb(vfu_ctx_t *vfu_ctx, vfu_reset_cb_t *reset) } int -vfu_setup_device_dma_cb(vfu_ctx_t *vfu_ctx, vfu_map_dma_cb_t *map_dma, - vfu_unmap_dma_cb_t *unmap_dma) +vfu_setup_device_dma(vfu_ctx_t *vfu_ctx, vfu_dma_register_cb_t *dma_register, + vfu_dma_unregister_cb_t *dma_unregister) { assert(vfu_ctx != NULL); @@ -1397,8 +1396,8 @@ vfu_setup_device_dma_cb(vfu_ctx_t *vfu_ctx, vfu_map_dma_cb_t *map_dma, return ERROR_INT(ENOMEM); } - vfu_ctx->map_dma = map_dma; - vfu_ctx->unmap_dma = unmap_dma; + vfu_ctx->dma_register = dma_register; + vfu_ctx->dma_unregister = dma_unregister; return 0; } @@ -1450,33 +1449,33 @@ vfu_setup_device_migration_callbacks(vfu_ctx_t *vfu_ctx, return 0; } -inline vfu_reg_info_t * +vfu_reg_info_t * vfu_get_region_info(vfu_ctx_t *vfu_ctx) { assert(vfu_ctx != NULL); return vfu_ctx->reg_info; } -inline int -vfu_addr_to_sg(vfu_ctx_t *vfu_ctx, dma_addr_t dma_addr, - uint32_t len, dma_sg_t *sg, int max_sg, int prot) +int +vfu_addr_to_sg(vfu_ctx_t *vfu_ctx, vfu_dma_addr_t dma_addr, + size_t len, dma_sg_t *sg, int max_sg, int prot) { assert(vfu_ctx != NULL); - if (unlikely(vfu_ctx->unmap_dma == NULL)) { + if (unlikely(vfu_ctx->dma == NULL)) { return ERROR_INT(EINVAL); } return dma_addr_to_sg(vfu_ctx->dma, dma_addr, len, sg, max_sg, prot); } -inline int +int vfu_map_sg(vfu_ctx_t *vfu_ctx, const dma_sg_t *sg, struct iovec *iov, int cnt) { int ret; - if (unlikely(vfu_ctx->unmap_dma == NULL)) { + if (unlikely(vfu_ctx->dma_unregister == NULL)) { return ERROR_INT(EINVAL); } @@ -1488,10 +1487,10 @@ vfu_map_sg(vfu_ctx_t *vfu_ctx, const dma_sg_t *sg, return 0; } -inline void +void vfu_unmap_sg(vfu_ctx_t *vfu_ctx, const dma_sg_t *sg, struct iovec *iov, int cnt) { - if (unlikely(vfu_ctx->unmap_dma == NULL)) { + if (unlikely(vfu_ctx->dma_unregister == NULL)) { return; } return dma_unmap_sg(vfu_ctx->dma, sg, iov, cnt); @@ -1515,7 +1514,7 @@ vfu_dma_read(vfu_ctx_t *vfu_ctx, dma_sg_t *sg, void *data) return ERROR_INT(ENOMEM); } - dma_send.addr = sg->dma_addr; + dma_send.addr = (uint64_t)sg->dma_addr; dma_send.count = sg->length; ret = vfu_ctx->tran->send_msg(vfu_ctx, msg_id, VFIO_USER_DMA_READ, &dma_send, sizeof(dma_send), NULL, @@ -1540,7 +1539,7 @@ vfu_dma_write(vfu_ctx_t *vfu_ctx, dma_sg_t *sg, void *data) if (dma_send == NULL) { return ERROR_INT(ENOMEM); } - dma_send->addr = sg->dma_addr; + dma_send->addr = (uint64_t)sg->dma_addr; dma_send->count = sg->length; memcpy(dma_send->data, data, sg->length); /* FIXME no need to copy! */ ret = vfu_ctx->tran->send_msg(vfu_ctx, msg_id, VFIO_USER_DMA_WRITE, diff --git a/lib/private.h b/lib/private.h index c463fea..49f9152 100644 --- a/lib/private.h +++ b/lib/private.h @@ -122,8 +122,8 @@ struct vfu_ctx { void *tran_data; uint64_t flags; char *uuid; - vfu_map_dma_cb_t *map_dma; - vfu_unmap_dma_cb_t *unmap_dma; + vfu_dma_register_cb_t *dma_register; + vfu_dma_unregister_cb_t *dma_unregister; int client_max_fds; @@ -139,26 +139,26 @@ struct vfu_ctx { void dump_buffer(const char *prefix, const char *buf, uint32_t count); +int +consume_fd(int *fds, size_t nr_fds, size_t index); + vfu_reg_info_t * vfu_get_region_info(vfu_ctx_t *vfu_ctx); +long +dev_get_reginfo(vfu_ctx_t *vfu_ctx, uint32_t index, uint32_t argsz, + struct vfio_region_info **vfio_reg, int **fds, size_t *nr_fds); + int handle_dma_map_or_unmap(vfu_ctx_t *vfu_ctx, uint32_t size, bool map, int *fds, size_t nr_fds, struct vfio_user_dma_region *dma_regions); int -consume_fd(int *fds, size_t nr_fds, size_t index); - -int handle_device_get_info(vfu_ctx_t *vfu_ctx, uint32_t size, struct vfio_device_info *in_dev_info, struct vfio_device_info *out_dev_info); -long -dev_get_reginfo(vfu_ctx_t *vfu_ctx, uint32_t index, uint32_t argsz, - struct vfio_region_info **vfio_reg, int **fds, size_t *nr_fds); - int handle_device_set_irqs(vfu_ctx_t *vfu_ctx, uint32_t size, int *fds, size_t nr_fds, struct vfio_irq_set *irq_set); @@ -181,6 +181,24 @@ MOCK_DECLARE(int, handle_dirty_pages, vfu_ctx_t *vfu_ctx, uint32_t size, struct iovec **iovecs, size_t *nr_iovecs, struct vfio_iommu_type1_dirty_bitmap *dirty_bitmap); +MOCK_DECLARE(bool, should_exec_command, vfu_ctx_t *vfu_ctx, uint16_t cmd); + +MOCK_DECLARE(bool, cmd_allowed_when_stopped_and_copying, uint16_t cmd); + +MOCK_DECLARE(int, get_next_command, vfu_ctx_t *vfu_ctx, + struct vfio_user_header *hdr, int *fds, size_t *nr_fds); + +MOCK_DECLARE(int, exec_command, vfu_ctx_t *vfu_ctx, + struct vfio_user_header *hdr, size_t size, int *fds, size_t nr_fds, + int **fds_out, size_t *nr_fds_out, struct iovec *_iovecs, + struct iovec **iovecs, size_t *nr_iovecs, bool *free_iovec_data); + +MOCK_DECLARE(int, process_request, vfu_ctx_t *vfu_ctx); + +MOCK_DECLARE(int, handle_dirty_pages, vfu_ctx_t *vfu_ctx, uint32_t size, + struct iovec **iovecs, size_t *nr_iovecs, + struct vfio_iommu_type1_dirty_bitmap *dirty_bitmap); + #endif /* LIB_VFIO_USER_PRIVATE_H */ /* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */ |