aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorJohn Levon <john.levon@nutanix.com>2021-03-31 16:49:52 +0100
committerGitHub <noreply@github.com>2021-03-31 16:49:52 +0100
commit14b237fc80473bf4419a2bccda4cf7e7d382c174 (patch)
tree959e0cc1c4d2557f624df6fa5c189a8cdc92993f /lib
parent0ae00cbb9edcc3879b1276cd61479d668a7f1ec9 (diff)
downloadlibvfio-user-14b237fc80473bf4419a2bccda4cf7e7d382c174.zip
libvfio-user-14b237fc80473bf4419a2bccda4cf7e7d382c174.tar.gz
libvfio-user-14b237fc80473bf4419a2bccda4cf7e7d382c174.tar.bz2
rework DMA callbacks (#396)
This fixes a number of issues with how DMA is handled, based on some changes by Thanos Makatos: - rename callbacks to register/unregister, as there is not necessarily any mapping - provide the (large) page-aligned mapped start and size, the page size used, as well as the protection flags: some API users need these - for convenience, provide the virtual address separately that corresponds to the mapped region - we should only require a DMA controller to use vfu_addr_to_sg(), not an unregister callback - the callbacks should return errno not -errno - region removal was incorrectly updating the region array - various other cleanups and clarifications Signed-off-by: John Levon <john.levon@nutanix.com> Reviewed-by: Thanos Makatos <thanos.makatos@nutanix.com>
Diffstat (limited to 'lib')
-rw-r--r--lib/dma.c276
-rw-r--r--lib/dma.h108
-rw-r--r--lib/irq.c4
-rw-r--r--lib/libvfio-user.c107
-rw-r--r--lib/private.h36
5 files changed, 253 insertions, 278 deletions
diff --git a/lib/dma.c b/lib/dma.c
index c06d666..d3db436 100644
--- a/lib/dma.c
+++ b/lib/dma.c
@@ -92,54 +92,53 @@ dma_controller_create(vfu_ctx_t *vfu_ctx, int max_regions)
}
void
-MOCK_DEFINE(_dma_controller_do_remove_region)(dma_controller_t *dma,
- dma_memory_region_t *region)
+MOCK_DEFINE(dma_controller_unmap_region)(dma_controller_t *dma,
+ dma_memory_region_t *region)
{
int err;
assert(dma != NULL);
assert(region != NULL);
- err = dma_unmap_region(region, region->virt_addr, region->size);
+ err = munmap(region->info.mapping.iov_base, region->info.mapping.iov_len);
if (err != 0) {
- vfu_log(dma->vfu_ctx, LOG_DEBUG, "failed to unmap fd=%d vaddr=%p-%p\n",
- region->fd, region->virt_addr,
- region->virt_addr + region->size - 1);
+ vfu_log(dma->vfu_ctx, LOG_DEBUG, "failed to unmap fd=%d "
+ "mapping=[%p, %p): %m\n",
+ region->fd, region->info.mapping.iov_base,
+ iov_end(&region->info.mapping));
}
- if (region->fd != -1) {
- if (close(region->fd) == -1) {
- vfu_log(dma->vfu_ctx, LOG_DEBUG,
- "failed to close fd %d: %m\n", region->fd);
- }
+
+ assert(region->fd != -1);
+
+ if (close(region->fd) == -1) {
+ vfu_log(dma->vfu_ctx, LOG_WARNING, "failed to close fd %d: %m\n",
+ region->fd);
}
}
-/*
- * FIXME no longer used. Also, it doesn't work for addresses that span two
- * DMA regions.
- */
-bool
-dma_controller_region_valid(dma_controller_t *dma, dma_addr_t dma_addr,
- size_t size)
+static void
+array_remove(void *array, size_t elem_size, size_t index, int *nr_elemsp)
{
- dma_memory_region_t *region;
- int i;
+ void *dest;
+ void *src;
+ size_t nr;
- for (i = 0; i < dma->nregions; i++) {
- region = &dma->regions[i];
- if (dma_addr == region->dma_addr && size <= region->size) {
- return true;
- }
- }
+ assert((size_t)*nr_elemsp > index);
+
+ nr = *nr_elemsp - (index + 1);
+ dest = (char *)array + (index * elem_size);
+ src = (char *)array + ((index + 1) * elem_size);
+
+ memmove(dest, src, nr * elem_size);
- return false;
+ (*nr_elemsp)--;
}
/* FIXME not thread safe */
int
MOCK_DEFINE(dma_controller_remove_region)(dma_controller_t *dma,
- dma_addr_t dma_addr, size_t size,
- vfu_unmap_dma_cb_t *unmap_dma,
+ vfu_dma_addr_t dma_addr, size_t size,
+ vfu_dma_unregister_cb_t *dma_unregister,
void *data)
{
int idx;
@@ -150,29 +149,29 @@ MOCK_DEFINE(dma_controller_remove_region)(dma_controller_t *dma,
for (idx = 0; idx < dma->nregions; idx++) {
region = &dma->regions[idx];
- if (region->dma_addr != dma_addr || region->size != size) {
+ if (region->info.iova.iov_base != dma_addr ||
+ region->info.iova.iov_len != size) {
continue;
}
- err = unmap_dma(data, region->dma_addr, region->size);
+
+ err = dma_unregister(data, &region->info);
if (err != 0) {
vfu_log(dma->vfu_ctx, LOG_ERR,
- "failed to notify of removal of DMA region %#lx-%#lx: %s\n",
- region->dma_addr, region->dma_addr + region->size,
- strerror(-err));
+ "failed to dma_unregister() DMA region [%#lx, %#lx): %s\n",
+ region->info.iova.iov_base, iov_end(&region->info.iova),
+ strerror(err));
return err;
}
+
assert(region->refcnt == 0);
- if (region->virt_addr != NULL) {
- _dma_controller_do_remove_region(dma, region);
+
+ if (region->info.vaddr != NULL) {
+ dma_controller_unmap_region(dma, region);
+ } else {
+ assert(region->fd == -1);
}
- if (dma->nregions > 1)
- /*
- * FIXME valgrind complains with 'Source and destination overlap in memcpy',
- * check whether memmove eliminates this warning.
- */
- memcpy(region, &dma->regions[dma->nregions - 1],
- sizeof(*region));
- dma->nregions--;
+
+ array_remove(&dma->regions, sizeof (*region), idx, &dma->nregions);
return 0;
}
return -ENOENT;
@@ -183,15 +182,22 @@ dma_controller_remove_regions(dma_controller_t *dma)
{
int i;
- assert(dma);
+ assert(dma != NULL);
for (i = 0; i < dma->nregions; i++) {
dma_memory_region_t *region = &dma->regions[i];
- vfu_log(dma->vfu_ctx, LOG_INFO, "unmap vaddr=%p IOVA=%lx",
- region->virt_addr, region->dma_addr);
+ vfu_log(dma->vfu_ctx, LOG_DEBUG, "removing DMA region "
+ "iova=[%#lx, %#lx) vaddr=%#lx mapping=[%#lx, %#lx)",
+ region->info.iova.iov_base, iov_end(&region->info.iova),
+ region->info.vaddr,
+ region->info.mapping.iov_base, iov_end(&region->info.mapping));
- _dma_controller_do_remove_region(dma, region);
+ if (region->info.vaddr != NULL) {
+ dma_controller_unmap_region(dma, region);
+ } else {
+ assert(region->fd == -1);
+ }
}
}
@@ -206,26 +212,65 @@ dma_controller_destroy(dma_controller_t *dma)
free(dma);
}
+static int
+dma_map_region(dma_controller_t *dma, dma_memory_region_t *region)
+{
+ void *mmap_base;
+ size_t mmap_len;
+ off_t offset;
+
+ offset = ROUND_DOWN(region->offset, region->info.page_size);
+ mmap_len = ROUND_UP(region->info.iova.iov_len, region->info.page_size);
+
+ mmap_base = mmap(NULL, mmap_len, region->info.prot, MAP_SHARED,
+ region->fd, offset);
+
+ if (mmap_base == MAP_FAILED) {
+ return -errno;
+ }
+
+ // Do not dump.
+ madvise(mmap_base, mmap_len, MADV_DONTDUMP);
+
+ region->info.mapping.iov_base = mmap_base;
+ region->info.mapping.iov_len = mmap_len;
+ region->info.vaddr = mmap_base + (region->offset - offset);
+
+ vfu_log(dma->vfu_ctx, LOG_DEBUG, "mapped DMA region iova=[%#lx, %#lx) "
+ "vaddr=%#lx page_size=%#lx mapping=[%#lx, %#lx)",
+ region->info.iova.iov_base, iov_end(&region->info.iova),
+ region->info.vaddr, region->info.page_size,
+ region->info.mapping.iov_base, iov_end(&region->info.mapping));
+
+
+ return 0;
+}
+
int
MOCK_DEFINE(dma_controller_add_region)(dma_controller_t *dma,
- dma_addr_t dma_addr, size_t size,
+ vfu_dma_addr_t dma_addr, size_t size,
int fd, off_t offset, uint32_t prot)
{
- int idx;
dma_memory_region_t *region;
int page_size = 0;
+ char rstr[1024];
+ int idx;
+ int ret;
assert(dma != NULL);
+ snprintf(rstr, sizeof(rstr), "[%p, %p) fd=%d offset=%#lx prot=%#x",
+ dma_addr, (char *)dma_addr + size, fd, offset, prot);
+
for (idx = 0; idx < dma->nregions; idx++) {
region = &dma->regions[idx];
/* First check if this is the same exact region. */
- if (region->dma_addr == dma_addr && region->size == size) {
+ if (region->info.iova.iov_base == dma_addr &&
+ region->info.iova.iov_len == size) {
if (offset != region->offset) {
- vfu_log(dma->vfu_ctx, LOG_ERR,
- "bad offset for new DMA region %#lx-%#lx, want=%ld, existing=%ld\n",
- dma_addr, dma_addr + size, offset, region->offset);
+ vfu_log(dma->vfu_ctx, LOG_ERR, "bad offset for new DMA region "
+ "%s; existing=%#lx\n", rstr, region->offset);
goto err;
}
if (!fds_are_same_file(region->fd, fd)) {
@@ -235,36 +280,34 @@ MOCK_DEFINE(dma_controller_add_region)(dma_controller_t *dma,
* the same file, however in the majority of cases we'll be
* using a single fd.
*/
- vfu_log(dma->vfu_ctx, LOG_ERR,
- "bad fd=%d for new DMA region %#lx-%#lx, existing fd=%d\n",
- fd, offset, offset + size - 1, region->fd);
+ vfu_log(dma->vfu_ctx, LOG_ERR, "bad fd for new DMA region %s; "
+ "existing=%d\n", rstr, region->fd);
goto err;
}
- if (region->prot != prot) {
- vfu_log(dma->vfu_ctx, LOG_ERR, "bad prot=%#x "
- "for new DMA region %#lx-%#lx, existing prot=%#x\n",
- prot, offset, offset + size - 1, region->prot);
+ if (region->info.prot != prot) {
+ vfu_log(dma->vfu_ctx, LOG_ERR, "bad prot for new DMA region "
+ "%s; existing=%#x", rstr, region->info.prot);
goto err;
}
return idx;
}
/* Check for overlap, i.e. start of one region is within another. */
- if ((dma_addr >= region->dma_addr &&
- dma_addr < region->dma_addr + region->size) ||
- (region->dma_addr >= dma_addr &&
- region->dma_addr < dma_addr + size)) {
- vfu_log(dma->vfu_ctx, LOG_INFO,
- "new DMA region %#lx+%#lx overlaps with DMA region %#lx-%#lx\n",
- dma_addr, size, region->dma_addr, region->size);
+ if ((dma_addr >= region->info.iova.iov_base &&
+ dma_addr < iov_end(&region->info.iova)) ||
+ (region->info.iova.iov_base >= dma_addr &&
+ region->info.iova.iov_base < dma_addr + size)) {
+ vfu_log(dma->vfu_ctx, LOG_INFO, "new DMA region %s overlaps with "
+ "DMA region [%#lx, %#lx)", rstr, region->info.iova.iov_base,
+ iov_end(&region->info.iova));
goto err;
}
}
if (dma->nregions == dma->max_regions) {
idx = dma->max_regions;
- vfu_log(dma->vfu_ctx, LOG_ERR,
- "reached maxed regions, recompile with higher number of DMA regions\n");
+ vfu_log(dma->vfu_ctx, LOG_ERR, "hit max regions %d\n",
+ dma->max_regions);
goto err;
}
@@ -280,32 +323,31 @@ MOCK_DEFINE(dma_controller_add_region)(dma_controller_t *dma,
}
page_size = MAX(page_size, getpagesize());
- region->dma_addr = dma_addr;
- region->size = size;
- region->page_size = page_size;
+ memset(region, 0, sizeof (*region));
+
+ region->info.iova.iov_base = (void *)dma_addr;
+ region->info.iova.iov_len = size;
+ region->info.page_size = page_size;
+ region->info.prot = prot;
region->offset = offset;
- region->prot = prot;
region->fd = fd;
- region->refcnt = 0;
if (fd != -1) {
- region->virt_addr = dma_map_region(region, region->prot, 0,
- region->size);
- if (region->virt_addr == MAP_FAILED) {
+ ret = dma_map_region(dma, region);
+
+ if (ret != 0) {
vfu_log(dma->vfu_ctx, LOG_ERR,
- "failed to memory map DMA region %#lx-%#lx: %s\n",
- dma_addr, dma_addr + size, strerror(errno));
- if (region->fd != -1) {
- if (close(region->fd) == -1) {
- vfu_log(dma->vfu_ctx, LOG_DEBUG,
- "failed to close fd %d: %m\n", region->fd);
- }
+ "failed to memory map DMA region %s: %s\n", rstr,
+ strerror(-ret));
+
+ if (close(region->fd) == -1) {
+ vfu_log(dma->vfu_ctx, LOG_WARNING,
+ "failed to close fd %d: %m\n", region->fd);
}
goto err;
}
- } else {
- region->virt_addr = NULL;
}
+
dma->nregions++;
return idx;
@@ -314,53 +356,9 @@ err:
return -idx - 1;
}
-static inline void
-mmap_round(size_t *offset, size_t *size, size_t page_size)
-{
- size_t offset_orig = *offset;
- *offset = ROUND_DOWN(offset_orig, page_size);
- *size = ROUND_UP(offset_orig + *size, page_size) - *offset;
-}
-
-void *
-MOCK_DEFINE(dma_map_region)(dma_memory_region_t *region, int prot,
- size_t offset, size_t len)
-{
- size_t mmap_offset, mmap_size = len;
- char *mmap_base;
-
- if (offset >= region->size || offset + len > region->size) {
- return MAP_FAILED;
- }
-
- offset += region->offset;
- mmap_offset = offset;
- mmap_round(&mmap_offset, &mmap_size, region->page_size);
-
- // Do the mmap.
- // Note: As per mmap(2) manpage, on some hardware architectures
- // (e.g., i386), PROT_WRITE implies PROT_READ
- mmap_base = mmap(NULL, mmap_size, prot, MAP_SHARED,
- region->fd, mmap_offset);
- if (mmap_base == MAP_FAILED) {
- return mmap_base;
- }
- // Do not dump.
- madvise(mmap_base, mmap_size, MADV_DONTDUMP);
-
- return mmap_base + (offset - mmap_offset);
-}
-
-int
-dma_unmap_region(dma_memory_region_t *region, void *virt_addr, size_t len)
-{
- mmap_round((size_t *)&virt_addr, &len, region->page_size);
- return munmap(virt_addr, len);
-}
-
int
_dma_addr_sg_split(const dma_controller_t *dma,
- dma_addr_t dma_addr, uint32_t len,
+ vfu_dma_addr_t dma_addr, uint32_t len,
dma_sg_t *sg, int max_sg, int prot)
{
int idx;
@@ -371,9 +369,10 @@ _dma_addr_sg_split(const dma_controller_t *dma,
found = false;
for (idx = 0; idx < dma->nregions; idx++) {
const dma_memory_region_t *const region = &dma->regions[idx];
- const dma_addr_t region_end = region->dma_addr + region->size;
+ vfu_dma_addr_t region_start = region->info.iova.iov_base;
+ vfu_dma_addr_t region_end = iov_end(&region->info.iova);
- while (dma_addr >= region->dma_addr && dma_addr < region_end) {
+ while (dma_addr >= region_start && dma_addr < region_end) {
size_t region_len = MIN(region_end - dma_addr, len);
if (cnt < max_sg) {
@@ -443,7 +442,10 @@ int dma_controller_dirty_page_logging_start(dma_controller_t *dma, size_t pgsize
for (i = 0; i < dma->nregions; i++) {
dma_memory_region_t *region = &dma->regions[i];
- ssize_t bitmap_size = get_bitmap_size(region->size, pgsize);
+ ssize_t bitmap_size;
+
+ bitmap_size = get_bitmap_size(region->info.iova.iov_len, pgsize);
+
if (bitmap_size < 0) {
return bitmap_size;
}
@@ -481,8 +483,8 @@ int dma_controller_dirty_page_logging_stop(dma_controller_t *dma)
}
int
-dma_controller_dirty_page_get(dma_controller_t *dma, dma_addr_t addr, int len,
- size_t pgsize, size_t size, char **data)
+dma_controller_dirty_page_get(dma_controller_t *dma, vfu_dma_addr_t addr,
+ int len, size_t pgsize, size_t size, char **data)
{
int ret;
ssize_t bitmap_size;
diff --git a/lib/dma.h b/lib/dma.h
index 0eb7ac3..226e758 100644
--- a/lib/dma.h
+++ b/lib/dma.h
@@ -51,8 +51,6 @@
* list (sglist) of dma_sg_t from DMA addresses. Then the sglist
* can be mapped using dma_map_sg() into the process's virtual address space
* as an iovec for direct access, and unmapped using dma_unmap_sg() when done.
- * - dma_map_addr() and dma_unmap_addr() helper functions are provided
- * for mapping DMA regions that can fit into one scatter-gather entry.
* Every region is mapped into the application's virtual address space
* at registration time with R/W permissions.
* dma_map_sg() ignores all protection bits and only does lookups and
@@ -79,18 +77,14 @@
#include "libvfio-user.h"
#include "common.h"
+#define iov_end(iov) ((iov)->iov_base + (iov)->iov_len)
+
struct vfu_ctx;
typedef struct {
- dma_addr_t dma_addr; // DMA address of this region
- uint32_t prot; // memory protection of the mapping
- // defined in sys/mman.h
-
- size_t size; // Size of this region
+ vfu_dma_info_t info;
int fd; // File descriptor to mmap
- int page_size; // Page size of this fd
off_t offset; // File offset
- void *virt_addr; // Virtual address of this region
int refcnt; // Number of users of this region
char *dirty_bitmap; // Dirty page bitmap
} dma_memory_region_t;
@@ -117,20 +111,20 @@ dma_controller_destroy(dma_controller_t *dma);
* (e.g. due to conflict with existing region).
*/
MOCK_DECLARE(int, dma_controller_add_region, dma_controller_t *dma,
- dma_addr_t dma_addr, size_t size, int fd, off_t offset,
+ vfu_dma_addr_t dma_addr, size_t size, int fd, off_t offset,
uint32_t prot);
-MOCK_DECLARE(void, _dma_controller_do_remove_region, dma_controller_t *dma,
- dma_memory_region_t *region);
-
MOCK_DECLARE(int, dma_controller_remove_region, dma_controller_t *dma,
- dma_addr_t dma_addr, size_t size, vfu_unmap_dma_cb_t *unmap_dma,
- void *data);
+ vfu_dma_addr_t dma_addr, size_t size,
+ vfu_dma_unregister_cb_t *dma_unregister, void *data);
+
+MOCK_DECLARE(void, dma_controller_unmap_region, dma_controller_t *dma,
+ dma_memory_region_t *region);
// Helper for dma_addr_to_sg() slow path.
int
_dma_addr_sg_split(const dma_controller_t *dma,
- dma_addr_t dma_addr, uint32_t len,
+ vfu_dma_addr_t dma_addr, uint32_t len,
dma_sg_t *sg, int max_sg, int prot);
static bool
@@ -142,9 +136,9 @@ _dma_should_mark_dirty(const dma_controller_t *dma, int prot)
}
static size_t
-_get_pgstart(size_t pgsize, uint64_t base_addr, uint64_t offset)
+_get_pgstart(size_t pgsize, void *base_addr, uint64_t offset)
{
- return (offset - base_addr) / pgsize;
+ return (offset - (uint64_t)base_addr) / pgsize;
}
static size_t
@@ -164,7 +158,7 @@ _dma_bitmap_get_pgrange(const dma_controller_t *dma,
assert(start != NULL);
assert(end != NULL);
- *start = _get_pgstart(dma->dirty_pgsize, region->dma_addr, sg->offset);
+ *start = _get_pgstart(dma->dirty_pgsize, region->info.iova.iov_base, sg->offset);
*end = _get_pgend(dma->dirty_pgsize, sg->length, *start);
}
@@ -186,24 +180,24 @@ _dma_mark_dirty(const dma_controller_t *dma, const dma_memory_region_t *region,
}
static inline int
-dma_init_sg(const dma_controller_t *dma, dma_sg_t *sg, dma_addr_t dma_addr,
+dma_init_sg(const dma_controller_t *dma, dma_sg_t *sg, vfu_dma_addr_t dma_addr,
uint32_t len, int prot, int region_index)
{
const dma_memory_region_t *const region = &dma->regions[region_index];
- if ((prot & PROT_WRITE) && !(region->prot & PROT_WRITE)) {
+ if ((prot & PROT_WRITE) && !(region->info.prot & PROT_WRITE)) {
errno = EACCES;
return -1;
}
- sg->dma_addr = region->dma_addr;
+ sg->dma_addr = region->info.iova.iov_base;
sg->region = region_index;
- sg->offset = dma_addr - region->dma_addr;
+ sg->offset = dma_addr - region->info.iova.iov_base;
sg->length = len;
if (_dma_should_mark_dirty(dma, prot)) {
_dma_mark_dirty(dma, region, sg);
}
- sg->mappable = region->virt_addr != NULL;
+ sg->mappable = (region->info.vaddr != NULL);
return 0;
}
@@ -223,18 +217,19 @@ dma_init_sg(const dma_controller_t *dma, dma_sg_t *sg, dma_addr_t dma_addr,
*/
static inline int
dma_addr_to_sg(const dma_controller_t *dma,
- dma_addr_t dma_addr, uint32_t len,
+ vfu_dma_addr_t dma_addr, size_t len,
dma_sg_t *sg, int max_sg, int prot)
{
static __thread int region_hint;
int cnt, ret;
const dma_memory_region_t *const region = &dma->regions[region_hint];
- const dma_addr_t region_end = region->dma_addr + region->size;
+ const void *region_end = iov_end(&region->info.iova);
// Fast path: single region.
if (likely(max_sg > 0 && len > 0 &&
- dma_addr >= region->dma_addr && dma_addr + len <= region_end &&
+ dma_addr >= region->info.iova.iov_base &&
+ dma_addr + len <= region_end &&
region_hint < dma->nregions)) {
ret = dma_init_sg(dma, sg, dma_addr, len, prot, region_hint);
if (ret < 0) {
@@ -251,12 +246,6 @@ dma_addr_to_sg(const dma_controller_t *dma,
return cnt;
}
-MOCK_DECLARE(void *, dma_map_region, dma_memory_region_t *region, int prot,
- size_t offset, size_t len);
-
-int
-dma_unmap_region(dma_memory_region_t *region, void *virt_addr, size_t len);
-
static inline int
dma_map_sg(dma_controller_t *dma, const dma_sg_t *sg, struct iovec *iov,
int cnt)
@@ -273,12 +262,15 @@ dma_map_sg(dma_controller_t *dma, const dma_sg_t *sg, struct iovec *iov,
return -EINVAL;
}
region = &dma->regions[sg[i].region];
- if (region->virt_addr == NULL) {
+
+ if (region->info.vaddr == NULL) {
return -EFAULT;
}
+
vfu_log(dma->vfu_ctx, LOG_DEBUG, "map %#lx-%#lx\n",
- sg->dma_addr + sg->offset, sg->dma_addr + sg->offset + sg->length);
- iov[i].iov_base = region->virt_addr + sg[i].offset;
+ sg->dma_addr + sg->offset,
+ sg->dma_addr + sg->offset + sg->length);
+ iov[i].iov_base = region->info.vaddr + sg[i].offset;
iov[i].iov_len = sg[i].length;
region->refcnt++;
}
@@ -299,7 +291,8 @@ dma_unmap_sg(dma_controller_t *dma, const dma_sg_t *sg,
* tfind(3)
*/
for (r = dma->regions;
- r < dma->regions + dma->nregions && r->dma_addr != sg[i].dma_addr;
+ r < dma->regions + dma->nregions &&
+ r->info.iova.iov_base != sg[i].dma_addr;
r++);
if (r > dma->regions + dma->nregions) {
/* bad region */
@@ -312,39 +305,6 @@ dma_unmap_sg(dma_controller_t *dma, const dma_sg_t *sg,
return;
}
-static inline void *
-dma_map_addr(dma_controller_t *dma, dma_addr_t dma_addr, uint32_t len, int prot)
-{
- dma_sg_t sg;
- struct iovec iov;
-
- if (dma_addr_to_sg(dma, dma_addr, len, &sg, 1, prot) == 1 &&
- dma_map_sg(dma, &sg, &iov, 1) == 0) {
- return iov.iov_base;
- }
-
- return NULL;
-}
-
-static inline void
-dma_unmap_addr(dma_controller_t *dma,
- dma_addr_t dma_addr, uint32_t len, void *addr)
-{
- dma_sg_t sg;
- struct iovec iov = {
- .iov_base = addr,
- .iov_len = len,
- };
- int r;
-
- r = dma_addr_to_sg(dma, dma_addr, len, &sg, 1, PROT_NONE);
- if (r != 1) {
- assert(false);
- }
-
- dma_unmap_sg(dma, &sg, &iov, 1);
-}
-
int
dma_controller_dirty_page_logging_start(dma_controller_t *dma, size_t pgsize);
@@ -352,12 +312,8 @@ int
dma_controller_dirty_page_logging_stop(dma_controller_t *dma);
int
-dma_controller_dirty_page_get(dma_controller_t *dma, dma_addr_t addr, int len,
- size_t pgsize, size_t size, char **data);
-
-bool
-dma_controller_region_valid(dma_controller_t *dma, dma_addr_t dma_addr,
- size_t size);
+dma_controller_dirty_page_get(dma_controller_t *dma, vfu_dma_addr_t addr,
+ int len, size_t pgsize, size_t size, char **data);
#endif /* LIB_VFIO_USER_DMA_H */
diff --git a/lib/irq.c b/lib/irq.c
index 3da1fb8..ecb6ce0 100644
--- a/lib/irq.c
+++ b/lib/irq.c
@@ -106,7 +106,7 @@ irqs_disable(vfu_ctx_t *vfu_ctx, uint32_t index, uint32_t start, uint32_t count)
count = vfu_ctx->irq_count[index];
}
- vfu_log(vfu_ctx, LOG_DEBUG, "disabling IRQ type %s range [%u-%u)",
+ vfu_log(vfu_ctx, LOG_DEBUG, "disabling IRQ type %s range [%u, %u)",
vfio_irq_idx_to_str(index), start, start + count);
switch (index) {
@@ -335,7 +335,7 @@ handle_device_set_irqs(vfu_ctx_t *vfu_ctx, uint32_t size,
irq_set->start, irq_set->count);
}
- vfu_log(vfu_ctx, LOG_DEBUG, "setting IRQ %s flags=%#x range [%u-%u)",
+ vfu_log(vfu_ctx, LOG_DEBUG, "setting IRQ %s flags=%#x range [%u, %u)",
vfio_irq_idx_to_str(irq_set->index), irq_set->flags,
irq_set->start, irq_set->start + irq_set->count);
diff --git a/lib/libvfio-user.c b/lib/libvfio-user.c
index 40a23b8..ebd70af 100644
--- a/lib/libvfio-user.c
+++ b/lib/libvfio-user.c
@@ -56,6 +56,7 @@
#include "private.h"
#include "tran_sock.h"
+
void
vfu_log(vfu_ctx_t *vfu_ctx, int level, const char *fmt, ...)
{
@@ -152,8 +153,8 @@ dev_get_caps(vfu_ctx_t *vfu_ctx, vfu_reg_info_t *vfu_reg, bool is_migr_reg,
for (i = 0; i < nr_mmap_areas; i++) {
struct iovec *iov = &vfu_reg->mmap_areas[i];
- vfu_log(vfu_ctx, LOG_DEBUG, "%s: area %d [%#llx-%#llx)", __func__,
- i, iov->iov_base, iov->iov_base + iov->iov_len);
+ vfu_log(vfu_ctx, LOG_DEBUG, "%s: area %d [%#llx, %#llx)", __func__,
+ i, iov->iov_base, iov_end(iov));
(*fds)[i] = vfu_reg->fd;
sparse->areas[i].offset = (uintptr_t)iov->iov_base;
@@ -487,7 +488,7 @@ handle_dma_map_or_unmap(vfu_ctx_t *vfu_ctx, uint32_t size, bool map,
size_t fdi;
assert(vfu_ctx != NULL);
- assert(fds != NULL); /* TODO assert valid only for map */
+ assert(nr_fds == 0 || fds != NULL);
if (vfu_ctx->dma == NULL) {
return 0;
@@ -504,61 +505,57 @@ handle_dma_map_or_unmap(vfu_ctx_t *vfu_ctx, uint32_t size, bool map,
}
for (i = 0, fdi = 0; i < nr_dma_regions; i++) {
+ struct vfio_user_dma_region *region = &dma_regions[i];
+ char rstr[1024];
+
+ snprintf(rstr, sizeof(rstr), "[%#lx, %#lx) offset=%#lx "
+ "prot=%#x flags=%#x", region->addr, region->addr + region->size,
+ region->offset, region->prot, region->flags);
+
+ vfu_log(vfu_ctx, LOG_DEBUG, "%s DMA region %s",
+ map ? "adding" : "removing", rstr);
+
if (map) {
int fd = -1;
- if (dma_regions[i].flags == VFIO_USER_F_DMA_REGION_MAPPABLE) {
+ if (region->flags == VFIO_USER_F_DMA_REGION_MAPPABLE) {
fd = consume_fd(fds, nr_fds, fdi++);
if (fd < 0) {
+ vfu_log(vfu_ctx, LOG_ERR, "failed to add DMA region %s: "
+ "mappable but fd not provided", rstr);
ret = fd;
break;
}
}
- ret = dma_controller_add_region(vfu_ctx->dma,
- dma_regions[i].addr,
- dma_regions[i].size,
- fd,
- dma_regions[i].offset,
- dma_regions[i].prot);
+ ret = dma_controller_add_region(vfu_ctx->dma, (void *)region->addr,
+ region->size, fd, region->offset,
+ region->prot);
if (ret < 0) {
if (fd != -1) {
close(fd);
}
- vfu_log(vfu_ctx, LOG_INFO,
- "failed to add DMA region %#lx-%#lx offset=%#lx fd=%d: %s",
- dma_regions[i].addr,
- dma_regions[i].addr + dma_regions[i].size - 1,
- dma_regions[i].offset, fd,
- strerror(-ret));
+ vfu_log(vfu_ctx, LOG_ERR, "failed to add DMA region %s: %s",
+ rstr, strerror(-ret));
break;
}
- if (vfu_ctx->map_dma != NULL) {
- vfu_ctx->map_dma(vfu_ctx, dma_regions[i].addr,
- dma_regions[i].size, dma_regions[i].prot);
+
+ if (vfu_ctx->dma_register != NULL) {
+ vfu_ctx->dma_register(vfu_ctx,
+ &vfu_ctx->dma->regions[ret].info);
}
+
ret = 0;
- vfu_log(vfu_ctx, LOG_DEBUG,
- "added DMA region %#lx-%#lx offset=%#lx fd=%d prot=%#x",
- dma_regions[i].addr,
- dma_regions[i].addr + dma_regions[i].size - 1,
- dma_regions[i].offset, fd, dma_regions[i].prot);
} else {
ret = dma_controller_remove_region(vfu_ctx->dma,
- dma_regions[i].addr,
- dma_regions[i].size,
- vfu_ctx->unmap_dma, vfu_ctx);
+ (void *)region->addr,
+ region->size,
+ vfu_ctx->dma_unregister,
+ vfu_ctx);
if (ret < 0) {
- vfu_log(vfu_ctx, LOG_INFO,
- "failed to remove DMA region %#lx-%#lx: %s",
- dma_regions[i].addr,
- dma_regions[i].addr + dma_regions[i].size - 1,
- strerror(-ret));
+ vfu_log(vfu_ctx, LOG_ERR, "failed to remove DMA region %s: %s",
+ rstr, strerror(-ret));
break;
}
- vfu_log(vfu_ctx, LOG_DEBUG,
- "removed DMA region %#lx-%#lx",
- dma_regions[i].addr,
- dma_regions[i].addr + dma_regions[i].size - 1);
}
}
return ret;
@@ -599,8 +596,10 @@ handle_dirty_pages_get(vfu_ctx_t *vfu_ctx,
for (i = 1; i < *nr_iovecs; i++) {
struct vfio_iommu_type1_dirty_bitmap_get *r = &ranges[(i - 1)]; /* FIXME ugly indexing */
- ret = dma_controller_dirty_page_get(vfu_ctx->dma, r->iova, r->size,
- r->bitmap.pgsize, r->bitmap.size,
+ ret = dma_controller_dirty_page_get(vfu_ctx->dma,
+ (vfu_dma_addr_t)r->iova,
+ r->size, r->bitmap.pgsize,
+ r->bitmap.size,
(char**)&((*iovecs)[i].iov_base));
if (ret != 0) {
goto out;
@@ -1327,7 +1326,7 @@ vfu_setup_region(vfu_ctx_t *vfu_ctx, int region_idx, size_t size,
for (i = 0; i < nr_mmap_areas; i++) {
struct iovec *iov = &mmap_areas[i];
- if ((size_t)iov->iov_base + iov->iov_len > size) {
+ if ((size_t)iov_end(iov) > size) {
return ERROR_INT(EINVAL);
}
}
@@ -1385,8 +1384,8 @@ vfu_setup_device_reset_cb(vfu_ctx_t *vfu_ctx, vfu_reset_cb_t *reset)
}
int
-vfu_setup_device_dma_cb(vfu_ctx_t *vfu_ctx, vfu_map_dma_cb_t *map_dma,
- vfu_unmap_dma_cb_t *unmap_dma)
+vfu_setup_device_dma(vfu_ctx_t *vfu_ctx, vfu_dma_register_cb_t *dma_register,
+ vfu_dma_unregister_cb_t *dma_unregister)
{
assert(vfu_ctx != NULL);
@@ -1397,8 +1396,8 @@ vfu_setup_device_dma_cb(vfu_ctx_t *vfu_ctx, vfu_map_dma_cb_t *map_dma,
return ERROR_INT(ENOMEM);
}
- vfu_ctx->map_dma = map_dma;
- vfu_ctx->unmap_dma = unmap_dma;
+ vfu_ctx->dma_register = dma_register;
+ vfu_ctx->dma_unregister = dma_unregister;
return 0;
}
@@ -1450,33 +1449,33 @@ vfu_setup_device_migration_callbacks(vfu_ctx_t *vfu_ctx,
return 0;
}
-inline vfu_reg_info_t *
+vfu_reg_info_t *
vfu_get_region_info(vfu_ctx_t *vfu_ctx)
{
assert(vfu_ctx != NULL);
return vfu_ctx->reg_info;
}
-inline int
-vfu_addr_to_sg(vfu_ctx_t *vfu_ctx, dma_addr_t dma_addr,
- uint32_t len, dma_sg_t *sg, int max_sg, int prot)
+int
+vfu_addr_to_sg(vfu_ctx_t *vfu_ctx, vfu_dma_addr_t dma_addr,
+ size_t len, dma_sg_t *sg, int max_sg, int prot)
{
assert(vfu_ctx != NULL);
- if (unlikely(vfu_ctx->unmap_dma == NULL)) {
+ if (unlikely(vfu_ctx->dma == NULL)) {
return ERROR_INT(EINVAL);
}
return dma_addr_to_sg(vfu_ctx->dma, dma_addr, len, sg, max_sg, prot);
}
-inline int
+int
vfu_map_sg(vfu_ctx_t *vfu_ctx, const dma_sg_t *sg,
struct iovec *iov, int cnt)
{
int ret;
- if (unlikely(vfu_ctx->unmap_dma == NULL)) {
+ if (unlikely(vfu_ctx->dma_unregister == NULL)) {
return ERROR_INT(EINVAL);
}
@@ -1488,10 +1487,10 @@ vfu_map_sg(vfu_ctx_t *vfu_ctx, const dma_sg_t *sg,
return 0;
}
-inline void
+void
vfu_unmap_sg(vfu_ctx_t *vfu_ctx, const dma_sg_t *sg, struct iovec *iov, int cnt)
{
- if (unlikely(vfu_ctx->unmap_dma == NULL)) {
+ if (unlikely(vfu_ctx->dma_unregister == NULL)) {
return;
}
return dma_unmap_sg(vfu_ctx->dma, sg, iov, cnt);
@@ -1515,7 +1514,7 @@ vfu_dma_read(vfu_ctx_t *vfu_ctx, dma_sg_t *sg, void *data)
return ERROR_INT(ENOMEM);
}
- dma_send.addr = sg->dma_addr;
+ dma_send.addr = (uint64_t)sg->dma_addr;
dma_send.count = sg->length;
ret = vfu_ctx->tran->send_msg(vfu_ctx, msg_id, VFIO_USER_DMA_READ,
&dma_send, sizeof(dma_send), NULL,
@@ -1540,7 +1539,7 @@ vfu_dma_write(vfu_ctx_t *vfu_ctx, dma_sg_t *sg, void *data)
if (dma_send == NULL) {
return ERROR_INT(ENOMEM);
}
- dma_send->addr = sg->dma_addr;
+ dma_send->addr = (uint64_t)sg->dma_addr;
dma_send->count = sg->length;
memcpy(dma_send->data, data, sg->length); /* FIXME no need to copy! */
ret = vfu_ctx->tran->send_msg(vfu_ctx, msg_id, VFIO_USER_DMA_WRITE,
diff --git a/lib/private.h b/lib/private.h
index c463fea..49f9152 100644
--- a/lib/private.h
+++ b/lib/private.h
@@ -122,8 +122,8 @@ struct vfu_ctx {
void *tran_data;
uint64_t flags;
char *uuid;
- vfu_map_dma_cb_t *map_dma;
- vfu_unmap_dma_cb_t *unmap_dma;
+ vfu_dma_register_cb_t *dma_register;
+ vfu_dma_unregister_cb_t *dma_unregister;
int client_max_fds;
@@ -139,26 +139,26 @@ struct vfu_ctx {
void
dump_buffer(const char *prefix, const char *buf, uint32_t count);
+int
+consume_fd(int *fds, size_t nr_fds, size_t index);
+
vfu_reg_info_t *
vfu_get_region_info(vfu_ctx_t *vfu_ctx);
+long
+dev_get_reginfo(vfu_ctx_t *vfu_ctx, uint32_t index, uint32_t argsz,
+ struct vfio_region_info **vfio_reg, int **fds, size_t *nr_fds);
+
int
handle_dma_map_or_unmap(vfu_ctx_t *vfu_ctx, uint32_t size, bool map,
int *fds, size_t nr_fds,
struct vfio_user_dma_region *dma_regions);
int
-consume_fd(int *fds, size_t nr_fds, size_t index);
-
-int
handle_device_get_info(vfu_ctx_t *vfu_ctx, uint32_t size,
struct vfio_device_info *in_dev_info,
struct vfio_device_info *out_dev_info);
-long
-dev_get_reginfo(vfu_ctx_t *vfu_ctx, uint32_t index, uint32_t argsz,
- struct vfio_region_info **vfio_reg, int **fds, size_t *nr_fds);
-
int
handle_device_set_irqs(vfu_ctx_t *vfu_ctx, uint32_t size,
int *fds, size_t nr_fds, struct vfio_irq_set *irq_set);
@@ -181,6 +181,24 @@ MOCK_DECLARE(int, handle_dirty_pages, vfu_ctx_t *vfu_ctx, uint32_t size,
struct iovec **iovecs, size_t *nr_iovecs,
struct vfio_iommu_type1_dirty_bitmap *dirty_bitmap);
+MOCK_DECLARE(bool, should_exec_command, vfu_ctx_t *vfu_ctx, uint16_t cmd);
+
+MOCK_DECLARE(bool, cmd_allowed_when_stopped_and_copying, uint16_t cmd);
+
+MOCK_DECLARE(int, get_next_command, vfu_ctx_t *vfu_ctx,
+ struct vfio_user_header *hdr, int *fds, size_t *nr_fds);
+
+MOCK_DECLARE(int, exec_command, vfu_ctx_t *vfu_ctx,
+ struct vfio_user_header *hdr, size_t size, int *fds, size_t nr_fds,
+ int **fds_out, size_t *nr_fds_out, struct iovec *_iovecs,
+ struct iovec **iovecs, size_t *nr_iovecs, bool *free_iovec_data);
+
+MOCK_DECLARE(int, process_request, vfu_ctx_t *vfu_ctx);
+
+MOCK_DECLARE(int, handle_dirty_pages, vfu_ctx_t *vfu_ctx, uint32_t size,
+ struct iovec **iovecs, size_t *nr_iovecs,
+ struct vfio_iommu_type1_dirty_bitmap *dirty_bitmap);
+
#endif /* LIB_VFIO_USER_PRIVATE_H */
/* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */