From e94bd44d10d8019ea2c39356363a5743136bdb5d Mon Sep 17 00:00:00 2001 From: John Levon Date: Fri, 27 Nov 2020 14:48:07 +0000 Subject: rename to libvfio-user (#128) The muser name no longer reflects the implementation, and will just serve to confuse. Bite the bullet now, and rename ourselves to reflect the actual implementation. Signed-off-by: John Levon Reviewed-by: Thanos Makatos Reviewed-by: Swapnil Ingle --- lib/CMakeLists.txt | 43 +- lib/cap.c | 111 ++-- lib/cap.h | 16 +- lib/common.h | 6 +- lib/dma.c | 33 +- lib/dma.h | 20 +- lib/irq.c | 168 ++--- lib/irq.h | 12 +- lib/libvfio-user.c | 1647 +++++++++++++++++++++++++++++++++++++++++++++++++ lib/migration.c | 101 +-- lib/migration.h | 18 +- lib/muser_ctx.c | 1643 ------------------------------------------------ lib/muser_pci.c | 333 ---------- lib/muser_priv.h | 217 ------- lib/pci.c | 332 ++++++++++ lib/private.h | 150 +++++ lib/python_bindings.c | 34 +- lib/setup.py | 4 +- lib/tran_sock.c | 186 +++--- lib/tran_sock.h | 69 +-- 20 files changed, 2548 insertions(+), 2595 deletions(-) create mode 100644 lib/libvfio-user.c delete mode 100644 lib/muser_ctx.c delete mode 100644 lib/muser_pci.c delete mode 100644 lib/muser_priv.h create mode 100644 lib/pci.c create mode 100644 lib/private.h (limited to 'lib') diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index 246e0ad..efc0a68 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -30,22 +30,22 @@ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC") -add_library(muser SHARED +add_library(vfio-user SHARED $ $ - $ - $ - $ - $ $ + $ + $ + $ + $ common.h irq.h - muser_priv.h + private.h tran_sock.h) -set_target_properties(muser PROPERTIES LINKER_LANGUAGE C) +set_target_properties(vfio-user PROPERTIES LINKER_LANGUAGE C) -target_link_libraries(muser json-c pthread) +target_link_libraries(vfio-user json-c pthread) set(UT_CFLAGS "-O0 -ggdb --coverage") set(UT_LFLAGS "--coverage") @@ -60,23 +60,22 @@ endfunction(add_library_ut) add_library_ut(cap cap.c) add_library_ut(dma dma.c) -add_library_ut(muser_ctx muser_ctx.c) -add_library_ut(muser_pci muser_pci.c) -add_library_ut(tran_sock tran_sock.c) -add_library_ut(migration migration.c) add_library_ut(irq irq.c) +add_library_ut(libvfio-user libvfio-user.c) +add_library_ut(migration migration.c) +add_library_ut(pci pci.c) +add_library_ut(tran_sock tran_sock.c) -install(TARGETS muser +install(TARGETS vfio-user LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}) if (DEFINED ENV{PYTHON_BINDINGS}) - add_custom_target(python_bindings_build ALL - COMMAND python setup.py build -b ${CMAKE_BINARY_DIR} - DEPENDS python_bindings.c setup.py - WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/lib - SOURCES python_bindings.c setup.py) - # execute_process seems to only execute a single command, e.g. it can't - # handle two commands joined by && - install(CODE "execute_process(COMMAND ${CMAKE_SOURCE_DIR}/lib/python_bindings_install.sh ${CMAKE_SOURCE_DIR})") - + add_custom_target(python_bindings_build ALL + COMMAND python setup.py build -b ${CMAKE_BINARY_DIR} + DEPENDS python_bindings.c setup.py + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/lib + SOURCES python_bindings.c setup.py) + # execute_process seems to only execute a single command, e.g. it can't + # handle two commands joined by && + install(CODE "execute_process(COMMAND ${CMAKE_SOURCE_DIR}/lib/python_bindings_install.sh ${CMAKE_SOURCE_DIR})") endif() diff --git a/lib/cap.c b/lib/cap.c index 9ace154..0bb975a 100644 --- a/lib/cap.c +++ b/lib/cap.c @@ -37,7 +37,7 @@ #include #include -#include "muser.h" +#include "libvfio-user.h" #include "cap.h" struct cap { @@ -46,7 +46,7 @@ struct cap { }; struct caps { - struct cap caps[LM_MAX_CAPS]; /* FIXME only needs to be as big as nr_caps */ + struct cap caps[VFU_MAX_CAPS]; /* FIXME only needs to be as big as nr_caps */ unsigned int nr_caps; }; @@ -72,7 +72,7 @@ cap_is_accessed(struct cap *caps, int nr_caps, size_t count, loff_t offset) /* * FIXME write starts before capabilities but extends into them. I don't - * think that the while loop in lm_access will allow this in the first + * think that the while loop in vfu_access will allow this in the first * place. */ assert(false); @@ -97,7 +97,7 @@ cap_is_accessed(struct cap *caps, int nr_caps, size_t count, loff_t offset) * (offset + count). */ static uint8_t * -cap_find(lm_pci_config_space_t *config_space, struct caps *caps, loff_t offset, +cap_find(vfu_pci_config_space_t *config_space, struct caps *caps, loff_t offset, size_t count) { struct cap *cap; @@ -133,17 +133,17 @@ cap_is_valid(uint8_t id) } uint8_t * -cap_find_by_id(lm_ctx_t *lm_ctx, uint8_t id) +cap_find_by_id(vfu_ctx_t *vfu_ctx, uint8_t id) { uint8_t *pos; - lm_pci_config_space_t *config_space; + vfu_pci_config_space_t *config_space; if (!cap_is_valid(id)) { errno = EINVAL; return NULL; } - config_space = lm_get_pci_config_space(lm_ctx); + config_space = vfu_pci_get_config_space(vfu_ctx); if (config_space->hdr.cap == 0) { errno = ENOENT; @@ -160,7 +160,7 @@ cap_find_by_id(lm_ctx_t *lm_ctx, uint8_t id) } pos = config_space->raw + *(pos + PCI_CAP_LIST_NEXT); } - errno = ENOENT; + errno = ENOENT; return NULL; } @@ -173,32 +173,32 @@ cap_header_is_accessed(uint8_t cap_offset, loff_t offset) return offset - cap_offset <= 1; } -typedef ssize_t (cap_access) (lm_ctx_t *lm_ctx, uint8_t *cap, char *buf, +typedef ssize_t (cap_access) (vfu_ctx_t *vfu_ctx, uint8_t *cap, char *buf, size_t count, loff_t offset); static ssize_t -handle_pmcs_write(lm_ctx_t *lm_ctx, struct pmcap *pm, +handle_pmcs_write(vfu_ctx_t *vfu_ctx, struct pmcap *pm, const struct pmcs *const pmcs) { if (pm->pmcs.ps != pmcs->ps) { - lm_log(lm_ctx, LM_DBG, "power state set to %#x\n", pmcs->ps); + vfu_log(vfu_ctx, VFU_DBG, "power state set to %#x\n", pmcs->ps); } if (pm->pmcs.pmee != pmcs->pmee) { - lm_log(lm_ctx, LM_DBG, "PME enable set to %#x\n", pmcs->pmee); + vfu_log(vfu_ctx, VFU_DBG, "PME enable set to %#x\n", pmcs->pmee); } if (pm->pmcs.dse != pmcs->dse) { - lm_log(lm_ctx, LM_DBG, "data select set to %#x\n", pmcs->dse); + vfu_log(vfu_ctx, VFU_DBG, "data select set to %#x\n", pmcs->dse); } if (pm->pmcs.pmes != pmcs->pmes) { - lm_log(lm_ctx, LM_DBG, "PME status set to %#x\n", pmcs->pmes); + vfu_log(vfu_ctx, VFU_DBG, "PME status set to %#x\n", pmcs->pmes); } pm->pmcs = *pmcs; return 0; } static ssize_t -handle_pm_write(lm_ctx_t *lm_ctx, uint8_t *cap, char *const buf, +handle_pm_write(vfu_ctx_t *vfu_ctx, uint8_t *cap, char *const buf, const size_t count, const loff_t offset) { struct pmcap *pm = (struct pmcap *)cap; @@ -214,28 +214,29 @@ handle_pm_write(lm_ctx_t *lm_ctx, uint8_t *cap, char *const buf, if (count != sizeof(struct pmcs)) { return -EINVAL; } - return handle_pmcs_write(lm_ctx, pm, (struct pmcs *)buf); + return handle_pmcs_write(vfu_ctx, pm, (struct pmcs *)buf); } return -EINVAL; } static ssize_t -handle_mxc_write(lm_ctx_t *lm_ctx, struct msixcap *msix, +handle_mxc_write(vfu_ctx_t *vfu_ctx, struct msixcap *msix, const struct mxc *const mxc) { assert(msix != NULL); assert(mxc != NULL); if (mxc->mxe != msix->mxc.mxe) { - lm_log(lm_ctx, LM_DBG, "%s MSI-X\n", mxc->mxe ? "enable" : "disable"); + vfu_log(vfu_ctx, VFU_DBG, "%s MSI-X\n", + mxc->mxe ? "enable" : "disable"); msix->mxc.mxe = mxc->mxe; } if (mxc->fm != msix->mxc.fm) { if (mxc->fm) { - lm_log(lm_ctx, LM_DBG, "all MSI-X vectors masked\n"); + vfu_log(vfu_ctx, VFU_DBG, "all MSI-X vectors masked\n"); } else { - lm_log(lm_ctx, LM_DBG, + vfu_log(vfu_ctx, VFU_DBG, "vector's mask bit determines whether vector is masked\n"); } msix->mxc.fm = mxc->fm; @@ -245,7 +246,7 @@ handle_mxc_write(lm_ctx_t *lm_ctx, struct msixcap *msix, } static ssize_t -handle_msix_write(lm_ctx_t *lm_ctx, uint8_t *cap, char *const buf, +handle_msix_write(vfu_ctx_t *vfu_ctx, uint8_t *cap, char *const buf, const size_t count, const loff_t offset) { struct msixcap *msix = (struct msixcap *)cap; @@ -253,79 +254,83 @@ handle_msix_write(lm_ctx_t *lm_ctx, uint8_t *cap, char *const buf, if (count == sizeof(struct mxc)) { switch (offset) { case offsetof(struct msixcap, mxc): - return handle_mxc_write(lm_ctx, msix, (struct mxc *)buf); + return handle_mxc_write(vfu_ctx, msix, (struct mxc *)buf); default: - lm_log(lm_ctx, LM_ERR, "invalid MSI-X write offset %ld\n", offset); + vfu_log(vfu_ctx, VFU_ERR, + "invalid MSI-X write offset %ld\n", offset); return -EINVAL; } } - lm_log(lm_ctx, LM_ERR, "invalid MSI-X write size %lu\n", count); + vfu_log(vfu_ctx, VFU_ERR, "invalid MSI-X write size %lu\n", count); return -EINVAL; } static int -handle_px_pxdc_write(lm_ctx_t *lm_ctx, struct pxcap *px, const union pxdc *const p) +handle_px_pxdc_write(vfu_ctx_t *vfu_ctx, struct pxcap *px, + const union pxdc *const p) { assert(px != NULL); assert(p != NULL); if (p->cere != px->pxdc.cere) { px->pxdc.cere = p->cere; - lm_log(lm_ctx, LM_DBG, "CERE %s\n", p->cere ? "enable" : "disable"); + vfu_log(vfu_ctx, VFU_DBG, "CERE %s\n", p->cere ? "enable" : "disable"); } if (p->nfere != px->pxdc.nfere) { px->pxdc.nfere = p->nfere; - lm_log(lm_ctx, LM_DBG, "NFERE %s\n", p->nfere ? "enable" : "disable"); + vfu_log(vfu_ctx, VFU_DBG, "NFERE %s\n", + p->nfere ? "enable" : "disable"); } if (p->fere != px->pxdc.fere) { px->pxdc.fere = p->fere; - lm_log(lm_ctx, LM_DBG, "FERE %s\n", p->fere ? "enable" : "disable"); + vfu_log(vfu_ctx, VFU_DBG, "FERE %s\n", p->fere ? "enable" : "disable"); } if (p->urre != px->pxdc.urre) { px->pxdc.urre = p->urre; - lm_log(lm_ctx, LM_DBG, "URRE %s\n", p->urre ? "enable" : "disable"); + vfu_log(vfu_ctx, VFU_DBG, "URRE %s\n", p->urre ? "enable" : "disable"); } if (p->ero != px->pxdc.ero) { px->pxdc.ero = p->ero; - lm_log(lm_ctx, LM_DBG, "ERO %s\n", p->ero ? "enable" : "disable"); + vfu_log(vfu_ctx, VFU_DBG, "ERO %s\n", p->ero ? "enable" : "disable"); } if (p->mps != px->pxdc.mps) { px->pxdc.mps = p->mps; - lm_log(lm_ctx, LM_DBG, "MPS set to %d\n", p->mps); + vfu_log(vfu_ctx, VFU_DBG, "MPS set to %d\n", p->mps); } if (p->ete != px->pxdc.ete) { px->pxdc.ete = p->ete; - lm_log(lm_ctx, LM_DBG, "ETE %s\n", p->ete ? "enable" : "disable"); + vfu_log(vfu_ctx, VFU_DBG, "ETE %s\n", p->ete ? "enable" : "disable"); } if (p->pfe != px->pxdc.pfe) { px->pxdc.pfe = p->pfe; - lm_log(lm_ctx, LM_DBG, "PFE %s\n", p->pfe ? "enable" : "disable"); + vfu_log(vfu_ctx, VFU_DBG, "PFE %s\n", p->pfe ? "enable" : "disable"); } if (p->appme != px->pxdc.appme) { px->pxdc.appme = p->appme; - lm_log(lm_ctx, LM_DBG, "APPME %s\n", p->appme ? "enable" : "disable"); + vfu_log(vfu_ctx, VFU_DBG, "APPME %s\n", + p->appme ? "enable" : "disable"); } if (p->ens != px->pxdc.ens) { px->pxdc.ens = p->ens; - lm_log(lm_ctx, LM_DBG, "ENS %s\n", p->ens ? "enable" : "disable"); + vfu_log(vfu_ctx, VFU_DBG, "ENS %s\n", p->ens ? "enable" : "disable"); } if (p->mrrs != px->pxdc.mrrs) { px->pxdc.mrrs = p->mrrs; - lm_log(lm_ctx, LM_DBG, "MRRS set to %d\n", p->mrrs); + vfu_log(vfu_ctx, VFU_DBG, "MRRS set to %d\n", p->mrrs); } if (p->iflr) { - lm_log(lm_ctx, LM_DBG, + vfu_log(vfu_ctx, VFU_DBG, "initiate function level reset\n"); } @@ -333,18 +338,18 @@ handle_px_pxdc_write(lm_ctx_t *lm_ctx, struct pxcap *px, const union pxdc *const } static int -handle_px_write_2_bytes(lm_ctx_t *lm_ctx, struct pxcap *px, char *const buf, +handle_px_write_2_bytes(vfu_ctx_t *vfu_ctx, struct pxcap *px, char *const buf, loff_t off) { switch (off) { case offsetof(struct pxcap, pxdc): - return handle_px_pxdc_write(lm_ctx, px, (union pxdc *)buf); + return handle_px_pxdc_write(vfu_ctx, px, (union pxdc *)buf); } return -EINVAL; } static ssize_t -handle_px_write(lm_ctx_t *lm_ctx, uint8_t *cap, char *const buf, +handle_px_write(vfu_ctx_t *vfu_ctx, uint8_t *cap, char *const buf, size_t count, loff_t offset) { struct pxcap *px = (struct pxcap *)cap; @@ -352,7 +357,7 @@ handle_px_write(lm_ctx_t *lm_ctx, uint8_t *cap, char *const buf, int err = -EINVAL; switch (count) { case 2: - err = handle_px_write_2_bytes(lm_ctx, px, buf, offset); + err = handle_px_write_2_bytes(vfu_ctx, px, buf, offset); break; } if (err != 0) { @@ -373,10 +378,10 @@ static const struct cap_handler { }; ssize_t -cap_maybe_access(lm_ctx_t *lm_ctx, struct caps *caps, char *buf, size_t count, +cap_maybe_access(vfu_ctx_t *vfu_ctx, struct caps *caps, char *buf, size_t count, loff_t offset) { - lm_pci_config_space_t *config_space; + vfu_pci_config_space_t *config_space; uint8_t *cap; if (caps == NULL) { @@ -392,7 +397,7 @@ cap_maybe_access(lm_ctx_t *lm_ctx, struct caps *caps, char *buf, size_t count, } /* we're now guaranteed that the access is within some capability */ - config_space = lm_get_pci_config_space(lm_ctx); + config_space = vfu_pci_get_config_space(vfu_ctx); cap = cap_find(config_space, caps, offset, count); assert(cap != NULL); /* FIXME */ @@ -400,26 +405,26 @@ cap_maybe_access(lm_ctx_t *lm_ctx, struct caps *caps, char *buf, size_t count, /* FIXME how to deal with writes to capability header? */ assert(false); } - return cap_handlers[cap[PCI_CAP_LIST_ID]].fn(lm_ctx, cap, buf, count, + return cap_handlers[cap[PCI_CAP_LIST_ID]].fn(vfu_ctx, cap, buf, count, offset - (loff_t)(cap - config_space->raw)); } -struct caps *caps_create(lm_ctx_t *lm_ctx, lm_cap_t **lm_caps, int nr_caps, - int *err) +struct caps * +caps_create(vfu_ctx_t *vfu_ctx, vfu_cap_t **vfu_caps, int nr_caps, int *err) { int i; uint8_t *prev; uint8_t next; - lm_pci_config_space_t *config_space; + vfu_pci_config_space_t *config_space; struct caps *caps = NULL; *err = 0; - if (nr_caps <= 0 || nr_caps >= LM_MAX_CAPS) { + if (nr_caps <= 0 || nr_caps >= VFU_MAX_CAPS) { *err = EINVAL; return NULL; } - assert(lm_caps != NULL); + assert(vfu_caps != NULL); caps = calloc(1, sizeof *caps); if (caps == NULL) { @@ -427,7 +432,7 @@ struct caps *caps_create(lm_ctx_t *lm_ctx, lm_cap_t **lm_caps, int nr_caps, goto err_out; } - config_space = lm_get_pci_config_space(lm_ctx); + config_space = vfu_pci_get_config_space(vfu_ctx); /* points to the next field of the previous capability */ prev = &config_space->hdr.cap; @@ -435,7 +440,7 @@ struct caps *caps_create(lm_ctx_t *lm_ctx, lm_cap_t **lm_caps, int nr_caps, next = PCI_STD_HEADER_SIZEOF; for (i = 0; i < nr_caps; i++) { - uint8_t *cap = (uint8_t*)lm_caps[i]; + uint8_t *cap = (uint8_t*)vfu_caps[i]; uint8_t id = cap[PCI_CAP_LIST_ID]; size_t size; @@ -460,7 +465,7 @@ struct caps *caps_create(lm_ctx_t *lm_ctx, lm_cap_t **lm_caps, int nr_caps, next += size; assert(next % 4 == 0); /* FIXME */ - lm_log(lm_ctx, LM_DBG, "initialized capability %s %#x-%#x\n", + vfu_log(vfu_ctx, VFU_DBG, "initialized capability %s %#x-%#x\n", cap_handlers[id].name, caps->caps[i].start, caps->caps[i].end); } caps->nr_caps = nr_caps; diff --git a/lib/cap.h b/lib/cap.h index 038562f..37c31d8 100644 --- a/lib/cap.h +++ b/lib/cap.h @@ -30,18 +30,18 @@ * */ -#ifndef LIB_MUSER_CAP_H -#define LIB_MUSER_CAP_H +#ifndef LIB_VFIO_USER_CAP_H +#define LIB_VFIO_USER_CAP_H -#include "muser.h" +#include "libvfio-user.h" struct caps; /** * Initializes PCI capabilities. */ -struct caps *caps_create(lm_ctx_t *lm_ctx, lm_cap_t **caps, int nr_caps, - int *err); +struct caps * +caps_create(vfu_ctx_t *vfu_ctx, vfu_cap_t **caps, int nr_caps, int *err); /* * Conditionally accesses the PCI capabilities. Returns: @@ -51,12 +51,12 @@ struct caps *caps_create(lm_ctx_t *lm_ctx, lm_cap_t **caps, int nr_caps, * <0: negative error code on error. */ ssize_t -cap_maybe_access(lm_ctx_t *lm_ctx, struct caps *caps, char *buf, size_t count, +cap_maybe_access(vfu_ctx_t *vfu_ctx, struct caps *caps, char *buf, size_t count, loff_t offset); uint8_t * -cap_find_by_id(lm_ctx_t *lm_ctx, uint8_t id); +cap_find_by_id(vfu_ctx_t *vfu_ctx, uint8_t id); -#endif /* LIB_MUSER_CAP_H */ +#endif /* LIB_VFIO_USER_CAP_H */ /* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lib/common.h b/lib/common.h index dbd3c9b..9a5189d 100644 --- a/lib/common.h +++ b/lib/common.h @@ -34,8 +34,8 @@ * Private utilities used by the library and sample/test code. */ -#ifndef LIB_MUSER_COMMON_H -#define LIB_MUSER_COMMON_H +#ifndef LIB_VFIO_USER_COMMON_H +#define LIB_VFIO_USER_COMMON_H #include @@ -58,6 +58,6 @@ #define UNIT_TEST_SYMBOL(x) \ typeof(x) __wrap_##x __attribute__((weak, alias(#x))) -#endif /* LIB_MUSER_COMMON_H */ +#endif /* LIB_VFIO_USER_COMMON_H */ /* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lib/dma.c b/lib/dma.c index bcc6128..061106f 100644 --- a/lib/dma.c +++ b/lib/dma.c @@ -70,7 +70,7 @@ fds_are_same_file(int fd1, int fd2) } dma_controller_t * -dma_controller_create(lm_ctx_t *lm_ctx, int max_regions) +dma_controller_create(vfu_ctx_t *vfu_ctx, int max_regions) { dma_controller_t *dma; @@ -81,7 +81,7 @@ dma_controller_create(lm_ctx_t *lm_ctx, int max_regions) return dma; } - dma->lm_ctx = lm_ctx; + dma->vfu_ctx = vfu_ctx; dma->max_regions = max_regions; dma->nregions = 0; memset(dma->regions, 0, max_regions * sizeof(dma->regions[0])); @@ -101,13 +101,14 @@ _dma_controller_do_remove_region(dma_controller_t *dma, err = dma_unmap_region(region, region->virt_addr, region->size); if (err != 0) { - lm_log(dma->lm_ctx, LM_DBG, "failed to unmap fd=%d vaddr=%p-%p\n", + vfu_log(dma->vfu_ctx, VFU_DBG, "failed to unmap fd=%d vaddr=%p-%p\n", region->fd, region->virt_addr, region->virt_addr + region->size - 1); } if (region->fd != -1) { if (close(region->fd) == -1) { - lm_log(dma->lm_ctx, LM_DBG, "failed to close fd %d: %m\n", region->fd); + vfu_log(dma->vfu_ctx, VFU_DBG, + "failed to close fd %d: %m\n", region->fd); } } } @@ -145,7 +146,7 @@ dma_controller_region_valid(dma_controller_t *dma, dma_addr_t dma_addr, int dma_controller_remove_region(dma_controller_t *dma, dma_addr_t dma_addr, size_t size, - lm_unmap_dma_cb_t *unmap_dma, void *data) + vfu_unmap_dma_cb_t *unmap_dma, void *data) { int idx; dma_memory_region_t *region; @@ -159,7 +160,7 @@ dma_controller_remove_region(dma_controller_t *dma, if (region->refcnt > 0) { err = unmap_dma(data, region->dma_addr, region->size); if (err != 0) { - lm_log(dma->lm_ctx, LM_ERR, + vfu_log(dma->vfu_ctx, VFU_ERR, "failed to notify of removal of DMA region %#lx-%#lx: %s\n", region->dma_addr, region->dma_addr + region->size, strerror(-err)); @@ -193,7 +194,7 @@ dma_controller_remove_regions(dma_controller_t *dma) for (i = 0; i < dma->nregions; i++) { dma_memory_region_t *region = &dma->regions[i]; - lm_log(dma->lm_ctx, LM_INF, "unmap vaddr=%p IOVA=%lx", + vfu_log(dma->vfu_ctx, VFU_INF, "unmap vaddr=%p IOVA=%lx", region->virt_addr, region->dma_addr); _dma_controller_do_remove_region(dma, region); @@ -228,7 +229,7 @@ dma_controller_add_region(dma_controller_t *dma, /* First check if this is the same exact region. */ if (region->dma_addr == dma_addr && region->size == size) { if (offset != region->offset) { - lm_log(dma->lm_ctx, LM_ERR, + vfu_log(dma->vfu_ctx, VFU_ERR, "bad offset for new DMA region %#lx-%#lx, want=%ld, existing=%ld\n", dma_addr, dma_addr + size, offset, region->offset); goto err; @@ -240,7 +241,7 @@ dma_controller_add_region(dma_controller_t *dma, * the same file, however in the majority of cases we'll be * using a single fd. */ - lm_log(dma->lm_ctx, LM_ERR, + vfu_log(dma->vfu_ctx, VFU_ERR, "bad fd=%d for new DMA region %#lx-%#lx, existing fd=%d\n", fd, offset, offset + size, region->fd); goto err; @@ -253,7 +254,7 @@ dma_controller_add_region(dma_controller_t *dma, dma_addr < region->dma_addr + region->size) || (region->dma_addr >= dma_addr && region->dma_addr < dma_addr + size)) { - lm_log(dma->lm_ctx, LM_INF, + vfu_log(dma->vfu_ctx, VFU_INF, "new DMA region %#lx+%#lx overlaps with DMA region %#lx-%#lx\n", dma_addr, size, region->dma_addr, region->size); goto err; @@ -262,7 +263,7 @@ dma_controller_add_region(dma_controller_t *dma, if (dma->nregions == dma->max_regions) { idx = dma->max_regions; - lm_log(dma->lm_ctx, LM_ERR, + vfu_log(dma->vfu_ctx, VFU_ERR, "reached maxed regions, recompile with higher number of DMA regions\n"); goto err; } @@ -273,7 +274,7 @@ dma_controller_add_region(dma_controller_t *dma, if (fd != -1) { page_size = fd_get_blocksize(fd); if (page_size < 0) { - lm_log(dma->lm_ctx, LM_ERR, "bad page size %d\n", page_size); + vfu_log(dma->vfu_ctx, VFU_ERR, "bad page size %d\n", page_size); goto err; } } @@ -290,13 +291,13 @@ dma_controller_add_region(dma_controller_t *dma, region->virt_addr = dma_map_region(region, PROT_READ | PROT_WRITE, 0, region->size); if (region->virt_addr == MAP_FAILED) { - lm_log(dma->lm_ctx, LM_ERR, + vfu_log(dma->vfu_ctx, VFU_ERR, "failed to memory map DMA region %#lx-%#lx: %s\n", dma_addr, dma_addr + size, strerror(errno)); if (region->fd != -1) { if (close(region->fd) == -1) { - lm_log(dma->lm_ctx, LM_DBG, "failed to close fd %d: %m\n", - region->fd); + vfu_log(dma->vfu_ctx, VFU_DBG, + "failed to close fd %d: %m\n", region->fd); } } goto err; @@ -514,7 +515,7 @@ dma_controller_dirty_page_get(dma_controller_t *dma, dma_addr_t addr, int len, if (size != (size_t)bitmap_size) { return -EINVAL; } - + region = &dma->regions[sg.region]; *data = region->dirty_bitmap; diff --git a/lib/dma.h b/lib/dma.h index 32140d8..dacf5bf 100644 --- a/lib/dma.h +++ b/lib/dma.h @@ -28,8 +28,8 @@ * */ -#ifndef LIB_MUSER_DMA_H -#define LIB_MUSER_DMA_H +#ifndef LIB_VFIO_USER_DMA_H +#define LIB_VFIO_USER_DMA_H /* * FIXME check whether DMA regions must be page aligned. If so then the @@ -75,10 +75,10 @@ #include #include -#include "muser.h" +#include "libvfio-user.h" #include "common.h" -struct lm_ctx; +struct vfu_ctx; typedef struct { dma_addr_t dma_addr; // DMA address of this region @@ -94,13 +94,13 @@ typedef struct { typedef struct { int max_regions; int nregions; - struct lm_ctx *lm_ctx; + struct vfu_ctx *vfu_ctx; size_t dirty_pgsize; // Dirty page granularity dma_memory_region_t regions[0]; } dma_controller_t; dma_controller_t * -dma_controller_create(lm_ctx_t *lm_ctx, int max_regions); +dma_controller_create(vfu_ctx_t *vfu_ctx, int max_regions); void dma_controller_destroy(dma_controller_t *dma); @@ -120,7 +120,7 @@ dma_controller_add_region(dma_controller_t *dma, int dma_controller_remove_region(dma_controller_t *dma, dma_addr_t dma_addr, size_t size, - lm_unmap_dma_cb_t *unmap_dma, void *data); + vfu_unmap_dma_cb_t *unmap_dma, void *data); // Helper for dma_addr_to_sg() slow path. int @@ -238,7 +238,7 @@ dma_map_sg(dma_controller_t *dma, const dma_sg_t *sg, struct iovec *iov, int i; for (i = 0; i < cnt; i++) { - lm_log(dma->lm_ctx, LM_DBG, "map %#lx-%#lx\n", + vfu_log(dma->vfu_ctx, VFU_DBG, "map %#lx-%#lx\n", sg->dma_addr + sg->offset, sg->dma_addr + sg->offset + sg->length); region = &dma->regions[sg[i].region]; iov[i].iov_base = region->virt_addr + sg[i].offset; @@ -271,7 +271,7 @@ dma_unmap_sg(dma_controller_t *dma, const dma_sg_t *sg, /* bad region */ continue; } - lm_log(dma->lm_ctx, LM_DBG, "unmap %#lx-%#lx\n", + vfu_log(dma->vfu_ctx, VFU_DBG, "unmap %#lx-%#lx\n", sg[i].dma_addr + sg[i].offset, sg[i].dma_addr + sg[i].offset + sg[i].length); r->refcnt--; } @@ -325,6 +325,6 @@ bool dma_controller_region_valid(dma_controller_t *dma, dma_addr_t dma_addr, size_t size); -#endif /* LIB_MUSER_DMA_H */ +#endif /* LIB_VFIO_USER_DMA_H */ /* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lib/irq.c b/lib/irq.c index b334745..ab174a1 100644 --- a/lib/irq.c +++ b/lib/irq.c @@ -47,48 +47,49 @@ vfio_irq_idx_to_str(int index) { [VFIO_PCI_MSIX_IRQ_INDEX] = "MSI-X", }; - assert(index < LM_DEV_NUM_IRQS); + assert(index < VFU_DEV_NUM_IRQS); return s[index]; } static long -irqs_disable(lm_ctx_t *lm_ctx, uint32_t index) +irqs_disable(vfu_ctx_t *vfu_ctx, uint32_t index) { int *irq_efd = NULL; uint32_t i; - assert(lm_ctx != NULL); - assert(index < LM_DEV_NUM_IRQS); + assert(vfu_ctx != NULL); + assert(index < VFU_DEV_NUM_IRQS); switch (index) { case VFIO_PCI_INTX_IRQ_INDEX: case VFIO_PCI_MSI_IRQ_INDEX: case VFIO_PCI_MSIX_IRQ_INDEX: - lm_log(lm_ctx, LM_DBG, "disabling IRQ %s", vfio_irq_idx_to_str(index)); - lm_ctx->irqs->type = IRQ_NONE; - for (i = 0; i < lm_ctx->irqs->max_ivs; i++) { - if (lm_ctx->irqs->efds[i] >= 0) { - if (close(lm_ctx->irqs->efds[i]) == -1) { - lm_log(lm_ctx, LM_DBG, "failed to close IRQ fd %d: %m", - lm_ctx->irqs->efds[i]); + vfu_log(vfu_ctx, VFU_DBG, "disabling IRQ %s", + vfio_irq_idx_to_str(index)); + vfu_ctx->irqs->type = IRQ_NONE; + for (i = 0; i < vfu_ctx->irqs->max_ivs; i++) { + if (vfu_ctx->irqs->efds[i] >= 0) { + if (close(vfu_ctx->irqs->efds[i]) == -1) { + vfu_log(vfu_ctx, VFU_DBG, "failed to close IRQ fd %d: %m", + vfu_ctx->irqs->efds[i]); } - lm_ctx->irqs->efds[i] = -1; + vfu_ctx->irqs->efds[i] = -1; } } return 0; case VFIO_PCI_ERR_IRQ_INDEX: - irq_efd = &lm_ctx->irqs->err_efd; + irq_efd = &vfu_ctx->irqs->err_efd; break; case VFIO_PCI_REQ_IRQ_INDEX: - irq_efd = &lm_ctx->irqs->req_efd; + irq_efd = &vfu_ctx->irqs->req_efd; break; } if (irq_efd != NULL) { if (*irq_efd != -1) { if (close(*irq_efd) == -1) { - lm_log(lm_ctx, LM_DBG, "failed to close IRQ fd %d: %m", + vfu_log(vfu_ctx, VFU_DBG, "failed to close IRQ fd %d: %m", *irq_efd); } *irq_efd = -1; @@ -96,12 +97,12 @@ irqs_disable(lm_ctx_t *lm_ctx, uint32_t index) return 0; } - lm_log(lm_ctx, LM_DBG, "failed to disable IRQs"); + vfu_log(vfu_ctx, VFU_DBG, "failed to disable IRQs"); return -EINVAL; } static int -irqs_set_data_none(lm_ctx_t *lm_ctx, struct vfio_irq_set *irq_set) +irqs_set_data_none(vfu_ctx_t *vfu_ctx, struct vfio_irq_set *irq_set) { int efd; __u32 i; @@ -109,12 +110,13 @@ irqs_set_data_none(lm_ctx_t *lm_ctx, struct vfio_irq_set *irq_set) eventfd_t val; for (i = irq_set->start; i < (irq_set->start + irq_set->count); i++) { - efd = lm_ctx->irqs->efds[i]; + efd = vfu_ctx->irqs->efds[i]; if (efd >= 0) { val = 1; ret = eventfd_write(efd, val); if (ret == -1) { - lm_log(lm_ctx, LM_DBG, "IRQ: failed to set data to none: %m"); + vfu_log(vfu_ctx, VFU_DBG, + "IRQ: failed to set data to none: %m"); return -errno; } } @@ -124,7 +126,7 @@ irqs_set_data_none(lm_ctx_t *lm_ctx, struct vfio_irq_set *irq_set) } static int -irqs_set_data_bool(lm_ctx_t *lm_ctx, struct vfio_irq_set *irq_set, void *data) +irqs_set_data_bool(vfu_ctx_t *vfu_ctx, struct vfio_irq_set *irq_set, void *data) { uint8_t *d8; int efd; @@ -135,12 +137,13 @@ irqs_set_data_bool(lm_ctx_t *lm_ctx, struct vfio_irq_set *irq_set, void *data) assert(data != NULL); for (i = irq_set->start, d8 = data; i < (irq_set->start + irq_set->count); i++, d8++) { - efd = lm_ctx->irqs->efds[i]; + efd = vfu_ctx->irqs->efds[i]; if (efd >= 0 && *d8 == 1) { val = 1; ret = eventfd_write(efd, val); if (ret == -1) { - lm_log(lm_ctx, LM_DBG, "IRQ: failed to set data to bool: %m"); + vfu_log(vfu_ctx, VFU_DBG, + "IRQ: failed to set data to bool: %m"); return -errno; } } @@ -150,7 +153,8 @@ irqs_set_data_bool(lm_ctx_t *lm_ctx, struct vfio_irq_set *irq_set, void *data) } static int -irqs_set_data_eventfd(lm_ctx_t *lm_ctx, struct vfio_irq_set *irq_set, void *data) +irqs_set_data_eventfd(vfu_ctx_t *vfu_ctx, struct vfio_irq_set *irq_set, + void *data) { int32_t *d32; int efd; @@ -159,47 +163,47 @@ irqs_set_data_eventfd(lm_ctx_t *lm_ctx, struct vfio_irq_set *irq_set, void *data assert(data != NULL); for (i = irq_set->start, d32 = data; i < (irq_set->start + irq_set->count); i++, d32++) { - efd = lm_ctx->irqs->efds[i]; + efd = vfu_ctx->irqs->efds[i]; if (efd >= 0) { if (close(efd) == -1) { - lm_log(lm_ctx, LM_DBG, "failed to close IRQ fd %d: %m", efd); + vfu_log(vfu_ctx, VFU_DBG, "failed to close IRQ fd %d: %m", efd); } - lm_ctx->irqs->efds[i] = -1; + vfu_ctx->irqs->efds[i] = -1; } if (*d32 >= 0) { - lm_ctx->irqs->efds[i] = *d32; + vfu_ctx->irqs->efds[i] = *d32; } - lm_log(lm_ctx, LM_DBG, "event fd[%d]=%d", i, lm_ctx->irqs->efds[i]); + vfu_log(vfu_ctx, VFU_DBG, "event fd[%d]=%d", i, vfu_ctx->irqs->efds[i]); } return 0; } static long -irqs_trigger(lm_ctx_t *lm_ctx, struct vfio_irq_set *irq_set, void *data) +irqs_trigger(vfu_ctx_t *vfu_ctx, struct vfio_irq_set *irq_set, void *data) { int err = 0; - assert(lm_ctx != NULL); + assert(vfu_ctx != NULL); assert(irq_set != NULL); if (irq_set->count == 0) { - return irqs_disable(lm_ctx, irq_set->index); + return irqs_disable(vfu_ctx, irq_set->index); } - lm_log(lm_ctx, LM_DBG, "setting IRQ %s flags=%#x", - vfio_irq_idx_to_str(irq_set->index), irq_set->flags); + vfu_log(vfu_ctx, VFU_DBG, "setting IRQ %s flags=%#x", + vfio_irq_idx_to_str(irq_set->index), irq_set->flags); switch (irq_set->flags & VFIO_IRQ_SET_DATA_TYPE_MASK) { case VFIO_IRQ_SET_DATA_NONE: - err = irqs_set_data_none(lm_ctx, irq_set); + err = irqs_set_data_none(vfu_ctx, irq_set); break; case VFIO_IRQ_SET_DATA_BOOL: - err = irqs_set_data_bool(lm_ctx, irq_set, data); + err = irqs_set_data_bool(vfu_ctx, irq_set, data); break; case VFIO_IRQ_SET_DATA_EVENTFD: - err = irqs_set_data_eventfd(lm_ctx, irq_set, data); + err = irqs_set_data_eventfd(vfu_ctx, irq_set, data); break; } @@ -207,11 +211,11 @@ irqs_trigger(lm_ctx_t *lm_ctx, struct vfio_irq_set *irq_set, void *data) } static long -dev_set_irqs_validate(lm_ctx_t *lm_ctx, struct vfio_irq_set *irq_set) +dev_set_irqs_validate(vfu_ctx_t *vfu_ctx, struct vfio_irq_set *irq_set) { uint32_t a_type, d_type; - assert(lm_ctx != NULL); + assert(vfu_ctx != NULL); assert(irq_set != NULL); // Separate action and data types from flags. @@ -219,8 +223,8 @@ dev_set_irqs_validate(lm_ctx_t *lm_ctx, struct vfio_irq_set *irq_set) d_type = (irq_set->flags & VFIO_IRQ_SET_DATA_TYPE_MASK); // Ensure index is within bounds. - if (irq_set->index >= LM_DEV_NUM_IRQS) { - lm_log(lm_ctx, LM_DBG, "bad IRQ index %d\n", irq_set->index); + if (irq_set->index >= VFU_DEV_NUM_IRQS) { + vfu_log(vfu_ctx, VFU_DBG, "bad IRQ index %d\n", irq_set->index); return -EINVAL; } @@ -230,39 +234,39 @@ dev_set_irqs_validate(lm_ctx_t *lm_ctx, struct vfio_irq_set *irq_set) if ((a_type != VFIO_IRQ_SET_ACTION_MASK) && (a_type != VFIO_IRQ_SET_ACTION_UNMASK) && (a_type != VFIO_IRQ_SET_ACTION_TRIGGER)) { - lm_log(lm_ctx, LM_DBG, "bad IRQ action mask %d\n", a_type); + vfu_log(vfu_ctx, VFU_DBG, "bad IRQ action mask %d\n", a_type); return -EINVAL; } // Only one of NONE/BOOL/EVENTFD is valid. if ((d_type != VFIO_IRQ_SET_DATA_NONE) && (d_type != VFIO_IRQ_SET_DATA_BOOL) && (d_type != VFIO_IRQ_SET_DATA_EVENTFD)) { - lm_log(lm_ctx, LM_DBG, "bad IRQ data %d\n", d_type); + vfu_log(vfu_ctx, VFU_DBG, "bad IRQ data %d\n", d_type); return -EINVAL; } // Ensure irq_set's start and count are within bounds. - if ((irq_set->start >= lm_ctx->irq_count[irq_set->index]) || - (irq_set->start + irq_set->count > lm_ctx->irq_count[irq_set->index])) { - lm_log(lm_ctx, LM_DBG, "bad IRQ start/count\n"); + if ((irq_set->start >= vfu_ctx->irq_count[irq_set->index]) || + (irq_set->start + irq_set->count > vfu_ctx->irq_count[irq_set->index])) { + vfu_log(vfu_ctx, VFU_DBG, "bad IRQ start/count\n"); return -EINVAL; } // Only TRIGGER is valid for ERR/REQ. if (((irq_set->index == VFIO_PCI_ERR_IRQ_INDEX) || (irq_set->index == VFIO_PCI_REQ_IRQ_INDEX)) && (a_type != VFIO_IRQ_SET_ACTION_TRIGGER)) { - lm_log(lm_ctx, LM_DBG, "bad IRQ trigger w/o ERR/REQ\n"); + vfu_log(vfu_ctx, VFU_DBG, "bad IRQ trigger w/o ERR/REQ\n"); return -EINVAL; } // count == 0 is only valid with ACTION_TRIGGER and DATA_NONE. if ((irq_set->count == 0) && ((a_type != VFIO_IRQ_SET_ACTION_TRIGGER) || (d_type != VFIO_IRQ_SET_DATA_NONE))) { - lm_log(lm_ctx, LM_DBG, "bad IRQ count %d\n", irq_set->count); + vfu_log(vfu_ctx, VFU_DBG, "bad IRQ count %d\n", irq_set->count); return -EINVAL; } // If IRQs are set, ensure index matches what's enabled for the device. - if ((irq_set->count != 0) && (lm_ctx->irqs->type != IRQ_NONE) && - (irq_set->index != LM2VFIO_IRQT(lm_ctx->irqs->type))) { - lm_log(lm_ctx, LM_DBG, "bad IRQ index\n"); + if ((irq_set->count != 0) && (vfu_ctx->irqs->type != IRQ_NONE) && + (irq_set->index != LM2VFIO_IRQT(vfu_ctx->irqs->type))) { + vfu_log(vfu_ctx, VFU_DBG, "bad IRQ index\n"); return -EINVAL; } @@ -270,15 +274,15 @@ dev_set_irqs_validate(lm_ctx_t *lm_ctx, struct vfio_irq_set *irq_set) } static long -dev_set_irqs(lm_ctx_t *lm_ctx, struct vfio_irq_set *irq_set, void *data) +dev_set_irqs(vfu_ctx_t *vfu_ctx, struct vfio_irq_set *irq_set, void *data) { long ret; - assert(lm_ctx != NULL); + assert(vfu_ctx != NULL); assert(irq_set != NULL); // Ensure irq_set is valid. - ret = dev_set_irqs_validate(lm_ctx, irq_set); + ret = dev_set_irqs_validate(vfu_ctx, irq_set); if (ret != 0) { return ret; } @@ -290,37 +294,37 @@ dev_set_irqs(lm_ctx_t *lm_ctx, struct vfio_irq_set *irq_set, void *data) return 0; } - return irqs_trigger(lm_ctx, irq_set, data); + return irqs_trigger(vfu_ctx, irq_set, data); } static long -dev_get_irqinfo(lm_ctx_t *lm_ctx, struct vfio_irq_info *irq_info_in, +dev_get_irqinfo(vfu_ctx_t *vfu_ctx, struct vfio_irq_info *irq_info_in, struct vfio_irq_info *irq_info_out) { - assert(lm_ctx != NULL); + assert(vfu_ctx != NULL); assert(irq_info_in != NULL); assert(irq_info_out != NULL); // Ensure provided argsz is sufficiently big and index is within bounds. if ((irq_info_in->argsz < sizeof(struct vfio_irq_info)) || - (irq_info_in->index >= LM_DEV_NUM_IRQS)) { - lm_log(lm_ctx, LM_DBG, "bad irq_info (size=%d index=%d)\n", - irq_info_in->argsz, irq_info_in->index); + (irq_info_in->index >= VFU_DEV_NUM_IRQS)) { + vfu_log(vfu_ctx, VFU_DBG, "bad irq_info (size=%d index=%d)\n", + irq_info_in->argsz, irq_info_in->index); return -EINVAL; } - irq_info_out->count = lm_ctx->irq_count[irq_info_in->index]; + irq_info_out->count = vfu_ctx->irq_count[irq_info_in->index]; irq_info_out->flags = VFIO_IRQ_INFO_EVENTFD; return 0; } int -handle_device_get_irq_info(lm_ctx_t *lm_ctx, uint32_t size, +handle_device_get_irq_info(vfu_ctx_t *vfu_ctx, uint32_t size, struct vfio_irq_info *irq_info_in, struct vfio_irq_info *irq_info_out) { - assert(lm_ctx != NULL); + assert(vfu_ctx != NULL); assert(irq_info_in != NULL); assert(irq_info_out != NULL); @@ -328,16 +332,16 @@ handle_device_get_irq_info(lm_ctx_t *lm_ctx, uint32_t size, return -EINVAL; } - return dev_get_irqinfo(lm_ctx, irq_info_in, irq_info_out); + return dev_get_irqinfo(vfu_ctx, irq_info_in, irq_info_out); } int -handle_device_set_irqs(lm_ctx_t *lm_ctx, uint32_t size, +handle_device_set_irqs(vfu_ctx_t *vfu_ctx, uint32_t size, int *fds, int nr_fds, struct vfio_irq_set *irq_set) { void *data = NULL; - assert(lm_ctx != NULL); + assert(vfu_ctx != NULL); assert(irq_set != NULL); if (size < sizeof *irq_set || size != irq_set->argsz) { @@ -359,19 +363,19 @@ handle_device_set_irqs(lm_ctx_t *lm_ctx, uint32_t size, return -EINVAL; } - return dev_set_irqs(lm_ctx, irq_set, data); + return dev_set_irqs(vfu_ctx, irq_set, data); } -static int validate_irq_subindex(lm_ctx_t *lm_ctx, uint32_t subindex) +static int validate_irq_subindex(vfu_ctx_t *vfu_ctx, uint32_t subindex) { - if (lm_ctx == NULL) { + if (vfu_ctx == NULL) { errno = EINVAL; return -1; } - if ((subindex >= lm_ctx->irqs->max_ivs)) { - lm_log(lm_ctx, LM_ERR, "bad IRQ %d, max=%d\n", subindex, - lm_ctx->irqs->max_ivs); + if ((subindex >= vfu_ctx->irqs->max_ivs)) { + vfu_log(vfu_ctx, VFU_ERR, "bad IRQ %d, max=%d\n", subindex, + vfu_ctx->irqs->max_ivs); /* FIXME should return -errno */ errno = EINVAL; return -1; @@ -381,42 +385,42 @@ static int validate_irq_subindex(lm_ctx_t *lm_ctx, uint32_t subindex) } int -lm_irq_trigger(lm_ctx_t *lm_ctx, uint32_t subindex) +vfu_irq_trigger(vfu_ctx_t *vfu_ctx, uint32_t subindex) { int ret; eventfd_t val = 1; - ret = validate_irq_subindex(lm_ctx, subindex); + ret = validate_irq_subindex(vfu_ctx, subindex); if (ret < 0) { return ret; } - if (lm_ctx->irqs->efds[subindex] == -1) { - lm_log(lm_ctx, LM_ERR, "no fd for interrupt %d\n", subindex); + if (vfu_ctx->irqs->efds[subindex] == -1) { + vfu_log(vfu_ctx, VFU_ERR, "no fd for interrupt %d\n", subindex); /* FIXME should return -errno */ errno = ENOENT; return -1; } - return eventfd_write(lm_ctx->irqs->efds[subindex], val); + return eventfd_write(vfu_ctx->irqs->efds[subindex], val); } int -lm_irq_message(lm_ctx_t *lm_ctx, uint32_t subindex) +vfu_irq_message(vfu_ctx_t *vfu_ctx, uint32_t subindex) { int ret, msg_id = 1; struct vfio_user_irq_info irq_info; - ret = validate_irq_subindex(lm_ctx, subindex); + ret = validate_irq_subindex(vfu_ctx, subindex); if (ret < 0) { return -1; } irq_info.subindex = subindex; - ret = vfio_user_msg(lm_ctx->conn_fd, msg_id, - VFIO_USER_VM_INTERRUPT, - &irq_info, sizeof irq_info, - NULL, NULL, 0); + ret = vfu_msg(vfu_ctx->conn_fd, msg_id, + VFIO_USER_VM_INTERRUPT, + &irq_info, sizeof irq_info, + NULL, NULL, 0); if (ret < 0) { /* FIXME should return -errno */ errno = -ret; diff --git a/lib/irq.h b/lib/irq.h index d29e224..5f29e9e 100644 --- a/lib/irq.h +++ b/lib/irq.h @@ -30,19 +30,19 @@ * */ -#ifndef LIB_MUSER_IRQ_H -#define LIB_MUSER_IRQ_H +#ifndef LIB_VFIO_USER_IRQ_H +#define LIB_VFIO_USER_IRQ_H -#include "muser_priv.h" +#include "private.h" int -handle_device_get_irq_info(lm_ctx_t *lm_ctx, uint32_t size, +handle_device_get_irq_info(vfu_ctx_t *vfu_ctx, uint32_t size, struct vfio_irq_info *irq_info_in, struct vfio_irq_info *irq_info_out); int -handle_device_set_irqs(lm_ctx_t *lm_ctx, uint32_t size, +handle_device_set_irqs(vfu_ctx_t *vfu_ctx, uint32_t size, int *fds, int nr_fds, struct vfio_irq_set *irq_set); -#endif /* LIB_MUSER_IRQ_H */ +#endif /* LIB_VFIO_USER_IRQ_H */ /* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lib/libvfio-user.c b/lib/libvfio-user.c new file mode 100644 index 0000000..2676362 --- /dev/null +++ b/lib/libvfio-user.c @@ -0,0 +1,1647 @@ +/* + * Copyright (c) 2019 Nutanix Inc. All rights reserved. + * + * Authors: Thanos Makatos + * Swapnil Ingle + * Felipe Franciosi + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Nutanix nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cap.h" +#include "dma.h" +#include "libvfio-user.h" +#include "private.h" +#include "tran_sock.h" +#include "migration.h" +#include "irq.h" + + +void +vfu_log(vfu_ctx_t *vfu_ctx, vfu_log_lvl_t lvl, const char *fmt, ...) +{ + va_list ap; + char buf[BUFSIZ]; + int _errno = errno; + + assert(vfu_ctx != NULL); + + if (vfu_ctx->log == NULL || lvl > vfu_ctx->log_lvl || fmt == NULL) { + return; + } + + va_start(ap, fmt); + vsnprintf(buf, sizeof buf, fmt, ap); + va_end(ap); + vfu_ctx->log(vfu_ctx->pvt, lvl, buf); + errno = _errno; +} + +static inline int ERROR(int err) +{ + errno = err; + return -1; +} + +static size_t +get_vfio_caps_size(bool is_migr_reg, struct vfu_sparse_mmap_areas *m) +{ + size_t type_size = 0; + size_t sparse_size = 0; + + if (is_migr_reg) { + type_size = sizeof(struct vfio_region_info_cap_type); + } + + if (m != NULL) { + sparse_size = sizeof(struct vfio_region_info_cap_sparse_mmap) + + (m->nr_mmap_areas * sizeof(struct vfio_region_sparse_mmap_area)); + } + + return type_size + sparse_size; +} + +/* + * Populate the sparse mmap capability information to vfio-client. + * Sparse mmap information stays after struct vfio_region_info and cap_offest + * points accordingly. + */ +static void +dev_get_caps(vfu_ctx_t *vfu_ctx, vfu_reg_info_t *vfu_reg, bool is_migr_reg, + struct vfio_region_info *vfio_reg) +{ + struct vfio_info_cap_header *header; + struct vfio_region_info_cap_type *type = NULL; + struct vfio_region_info_cap_sparse_mmap *sparse = NULL; + struct vfu_sparse_mmap_areas *mmap_areas; + + assert(vfu_ctx != NULL); + assert(vfio_reg != NULL); + + header = (struct vfio_info_cap_header*)(vfio_reg + 1); + + if (is_migr_reg) { + type = (struct vfio_region_info_cap_type*)header; + type->header.id = VFIO_REGION_INFO_CAP_TYPE; + type->header.version = 1; + type->header.next = 0; + type->type = VFIO_REGION_TYPE_MIGRATION; + type->subtype = VFIO_REGION_SUBTYPE_MIGRATION; + vfio_reg->cap_offset = sizeof(struct vfio_region_info); + } + + if (vfu_reg->mmap_areas != NULL) { + int i, nr_mmap_areas = vfu_reg->mmap_areas->nr_mmap_areas; + if (type != NULL) { + type->header.next = vfio_reg->cap_offset + sizeof(struct vfio_region_info_cap_type); + sparse = (struct vfio_region_info_cap_sparse_mmap*)(type + 1); + } else { + vfio_reg->cap_offset = sizeof(struct vfio_region_info); + sparse = (struct vfio_region_info_cap_sparse_mmap*)header; + } + sparse->header.id = VFIO_REGION_INFO_CAP_SPARSE_MMAP; + sparse->header.version = 1; + sparse->header.next = 0; + sparse->nr_areas = nr_mmap_areas; + + mmap_areas = vfu_reg->mmap_areas; + for (i = 0; i < nr_mmap_areas; i++) { + sparse->areas[i].offset = mmap_areas->areas[i].start; + sparse->areas[i].size = mmap_areas->areas[i].size; + vfu_log(vfu_ctx, VFU_DBG, "%s: area %d %#llx-%#llx", __func__, + i, sparse->areas[i].offset, + sparse->areas[i].offset + sparse->areas[i].size); + } + } + + /* + * FIXME VFIO_REGION_INFO_FLAG_MMAP is valid if the region is + * memory-mappable in general, not only if it supports sparse mmap. + */ + vfio_reg->flags |= VFIO_REGION_INFO_FLAG_MMAP | VFIO_REGION_INFO_FLAG_CAPS; +} + +#define VFU_REGION_SHIFT 40 +#define VFU_REGION_MASK ((1ULL << VFU_REGION_SHIFT) - 1) + +uint64_t +region_to_offset(uint32_t region) +{ + return (uint64_t)region << VFU_REGION_SHIFT; +} + +uint32_t +offset_to_region(uint64_t offset) +{ + return (offset >> VFU_REGION_SHIFT) & VFU_REGION_MASK; +} + +#ifdef VFU_VERBOSE_LOGGING +void +dump_buffer(const char *prefix, const char *buf, uint32_t count) +{ + int i; + const size_t bytes_per_line = 0x8; + + if (strcmp(prefix, "")) { + fprintf(stderr, "%s\n", prefix); + } + for (i = 0; i < (int)count; i++) { + if (i % bytes_per_line != 0) { + fprintf(stderr, " "); + } + /* TODO valgrind emits a warning if count is 1 */ + fprintf(stderr,"0x%02x", *(buf + i)); + if ((i + 1) % bytes_per_line == 0) { + fprintf(stderr, "\n"); + } + } + if (i % bytes_per_line != 0) { + fprintf(stderr, "\n"); + } +} +#else +#define dump_buffer(prefix, buf, count) +#endif + +static bool +is_migr_reg(vfu_ctx_t *vfu_ctx, int index) +{ + return &vfu_ctx->reg_info[index] == vfu_ctx->migr_reg; +} + +static long +dev_get_reginfo(vfu_ctx_t *vfu_ctx, uint32_t index, + struct vfio_region_info **vfio_reg) +{ + vfu_reg_info_t *vfu_reg; + size_t caps_size; + uint32_t argsz; + + assert(vfu_ctx != NULL); + assert(vfio_reg != NULL); + + vfu_reg = &vfu_ctx->reg_info[index]; + + if (index >= vfu_ctx->nr_regions) { + vfu_log(vfu_ctx, VFU_DBG, "bad region index %d", index); + return -EINVAL; + } + + caps_size = get_vfio_caps_size(is_migr_reg(vfu_ctx, index), + vfu_reg->mmap_areas); + argsz = caps_size + sizeof(struct vfio_region_info); + *vfio_reg = calloc(1, argsz); + if (!*vfio_reg) { + return -ENOMEM; + } + /* FIXME document in the protocol that vfio_req->argsz is ignored */ + (*vfio_reg)->argsz = argsz; + (*vfio_reg)->flags = vfu_reg->flags; + (*vfio_reg)->index = index; + (*vfio_reg)->offset = region_to_offset((*vfio_reg)->index); + (*vfio_reg)->size = vfu_reg->size; + + if (caps_size > 0) { + dev_get_caps(vfu_ctx, vfu_reg, is_migr_reg(vfu_ctx, index), *vfio_reg); + } + + vfu_log(vfu_ctx, VFU_DBG, "region_info[%d] offset %#llx flags %#x size %llu " + "argsz %u", + (*vfio_reg)->index, (*vfio_reg)->offset, (*vfio_reg)->flags, + (*vfio_reg)->size, (*vfio_reg)->argsz); + + return 0; +} + +int +vfu_get_region(loff_t pos, size_t count, loff_t *off) +{ + int r; + + assert(off != NULL); + + r = offset_to_region(pos); + if ((int)offset_to_region(pos + count) != r) { + return -ENOENT; + } + *off = pos - region_to_offset(r); + + return r; +} + +static uint32_t +region_size(vfu_ctx_t *vfu_ctx, int region) +{ + assert(region >= VFU_PCI_DEV_BAR0_REGION_IDX && region <= VFU_PCI_DEV_VGA_REGION_IDX); + return vfu_ctx->reg_info[region].size; +} + +static uint32_t +pci_config_space_size(vfu_ctx_t *vfu_ctx) +{ + return region_size(vfu_ctx, VFU_PCI_DEV_CFG_REGION_IDX); +} + +static ssize_t +handle_pci_config_space_access(vfu_ctx_t *vfu_ctx, char *buf, size_t count, + loff_t pos, bool is_write) +{ + int ret; + + count = MIN(pci_config_space_size(vfu_ctx), count); + if (is_write) { + ret = cap_maybe_access(vfu_ctx, vfu_ctx->caps, buf, count, pos); + if (ret < 0) { + vfu_log(vfu_ctx, VFU_ERR, "bad access to capabilities %#lx-%#lx\n", + pos, pos + count); + return ret; + } + } else { + memcpy(buf, vfu_ctx->pci_config_space->raw + pos, count); + } + return count; +} + +static ssize_t +do_access(vfu_ctx_t *vfu_ctx, char *buf, uint8_t count, uint64_t pos, bool is_write) +{ + int idx; + loff_t offset; + + assert(vfu_ctx != NULL); + assert(buf != NULL); + assert(count == 1 || count == 2 || count == 4 || count == 8); + + idx = vfu_get_region(pos, count, &offset); + if (idx < 0) { + vfu_log(vfu_ctx, VFU_ERR, "invalid region %d", idx); + return idx; + } + + if (idx < 0 || idx >= (int)vfu_ctx->nr_regions) { + vfu_log(vfu_ctx, VFU_ERR, "bad region %d", idx); + return -EINVAL; + } + + if (idx == VFU_PCI_DEV_CFG_REGION_IDX) { + return handle_pci_config_space_access(vfu_ctx, buf, count, offset, + is_write); + } + + if (is_migr_reg(vfu_ctx, idx)) { + if (offset + count > vfu_ctx->reg_info[idx].size) { + vfu_log(vfu_ctx, VFU_ERR, "read %#lx-%#lx past end of migration region (%#x)", + offset, offset + count - 1, + vfu_ctx->reg_info[idx].size); + return -EINVAL; + } + return handle_migration_region_access(vfu_ctx, vfu_ctx->pvt, + vfu_ctx->migration, + buf, count, offset, is_write); + } + + /* + * Checking whether a callback exists might sound expensive however this + * code is not performance critical. This works well when we don't expect a + * region to be used, so the user of the library can simply leave the + * callback NULL in vfu_create_ctx. + */ + if (vfu_ctx->reg_info[idx].fn != NULL) { + return vfu_ctx->reg_info[idx].fn(vfu_ctx->pvt, buf, count, offset, + is_write); + } + + vfu_log(vfu_ctx, VFU_ERR, "no callback for region %d", idx); + + return -EINVAL; +} + +/* + * Returns the number of bytes processed on success or a negative number on + * error. + * + * TODO function naming, general cleanup of access path + * FIXME we must be able to return values up to uint32_t bit, or negative on + * error. Better to make return value an int and return the number of bytes + * processed via an argument. + */ +static ssize_t +_vfu_access(vfu_ctx_t *vfu_ctx, char *buf, uint32_t count, uint64_t *ppos, + bool is_write) +{ + uint32_t done = 0; + int ret; + + assert(vfu_ctx != NULL); + /* buf and ppos can be NULL if count is 0 */ + + while (count) { + size_t size; + /* + * Limit accesses to qword and enforce alignment. Figure out whether + * the PCI spec requires this + * FIXME while this makes sense for registers, we might be able to relax + * this requirement and make some transfers more efficient. Maybe make + * this a per-region option that can be set by the user? + */ + if (count >= 8 && !(*ppos % 8)) { + size = 8; + } else if (count >= 4 && !(*ppos % 4)) { + size = 4; + } else if (count >= 2 && !(*ppos % 2)) { + size = 2; + } else { + size = 1; + } + ret = do_access(vfu_ctx, buf, size, *ppos, is_write); + if (ret <= 0) { + vfu_log(vfu_ctx, VFU_ERR, "failed to %s %#lx-%#lx: %s", + is_write ? "write to" : "read from", *ppos, *ppos + size - 1, + strerror(-ret)); + /* + * TODO if ret < 0 then it might contain a legitimate error code, why replace it with EFAULT? + */ + return -EFAULT; + } + if (ret != (int)size) { + vfu_log(vfu_ctx, VFU_DBG, "bad read %d != %ld", ret, size); + } + count -= size; + done += size; + *ppos += size; + buf += size; + } + return done; +} + +static inline int +vfu_access(vfu_ctx_t *vfu_ctx, bool is_write, char *rwbuf, uint32_t count, + uint64_t *pos) +{ + uint32_t processed = 0, _count; + int ret; + + assert(vfu_ctx != NULL); + assert(rwbuf != NULL); + assert(pos != NULL); + + vfu_log(vfu_ctx, VFU_DBG, "%s %#lx-%#lx", is_write ? "W" : "R", *pos, + *pos + count - 1); + +#ifdef VFU_VERBOSE_LOGGING + if (is_write) { + dump_buffer("buffer write", rwbuf, count); + } +#endif + + _count = count; + ret = vfu_pci_hdr_access(vfu_ctx, &_count, pos, is_write, rwbuf); + if (ret != 0) { + /* FIXME shouldn't we fail here? */ + vfu_log(vfu_ctx, VFU_ERR, "failed to access PCI header: %s", + strerror(-ret)); +#ifdef VFU_VERBOSE_LOGGING + dump_buffer("buffer write", rwbuf, _count); +#endif + } + + /* + * count is how much has been processed by vfu_pci_hdr_access, + * _count is how much there's left to be processed by vfu_access + */ + processed = count - _count; + ret = _vfu_access(vfu_ctx, rwbuf + processed, _count, pos, is_write); + if (ret >= 0) { + ret += processed; +#ifdef VFU_VERBOSE_LOGGING + if (!is_write && err == ret) { + dump_buffer("buffer read", rwbuf, ret); + } +#endif + } + + return ret; +} + +/* TODO merge with dev_get_reginfo */ +static int +handle_device_get_region_info(vfu_ctx_t *vfu_ctx, uint32_t size, + struct vfio_region_info *reg_info_in, + struct vfio_region_info **reg_info_out) +{ + if (size != sizeof(*reg_info_in) || size != reg_info_in->argsz) { + return -EINVAL; + } + + return dev_get_reginfo(vfu_ctx, reg_info_in->index, reg_info_out); +} + +static int +handle_device_get_info(vfu_ctx_t *vfu_ctx, uint32_t size, + struct vfio_device_info *dev_info) +{ + assert(vfu_ctx != NULL); + assert(dev_info != NULL); + + if (size != sizeof *dev_info) { + return -EINVAL; + } + + dev_info->argsz = sizeof *dev_info; + dev_info->flags = VFIO_DEVICE_FLAGS_PCI | VFIO_DEVICE_FLAGS_RESET; + dev_info->num_regions = vfu_ctx->nr_regions; + dev_info->num_irqs = VFU_DEV_NUM_IRQS; + + vfu_log(vfu_ctx, VFU_DBG, "sent devinfo flags %#x, num_regions %d, num_irqs" + " %d", dev_info->flags, dev_info->num_regions, dev_info->num_irqs); + + return 0; +} + +/* + * Handles a DMA map/unmap request. + * + * @vfu_ctx: LM context + * @size: size, in bytes, of the memory pointed to be @dma_regions + * @map: whether this is a DMA map operation + * @fds: array of file descriptors. It's length must equal the number of DMA + regions, irrespectively if @nr_fds is 0. + * @nr_fds: size of above array. It must be either 0 or exactly match + * the number of DMA regions in @dma_regions. + * @dma_regions: memory that contains the DMA regions to be mapped/unmapped + * + * @returns 0 on success, -errno on failure. + */ +int +handle_dma_map_or_unmap(vfu_ctx_t *vfu_ctx, uint32_t size, bool map, + int *fds, int nr_fds, + struct vfio_user_dma_region *dma_regions) +{ + int nr_dma_regions; + int ret, i, fdi; + + assert(vfu_ctx != NULL); + assert(fds != NULL); + + if (vfu_ctx->dma == NULL) { + return 0; + } + + if (size % sizeof(struct vfio_user_dma_region) != 0) { + vfu_log(vfu_ctx, VFU_ERR, "bad size of DMA regions %d", size); + return -EINVAL; + } + + nr_dma_regions = (int)(size / sizeof(struct vfio_user_dma_region)); + + for (i = 0, fdi = 0; i < nr_dma_regions; i++) { + if (map) { + int fd = -1; + if (dma_regions[i].flags == VFIO_USER_F_DMA_REGION_MAPPABLE) { + if (fdi == nr_fds) { + return -EINVAL; + } + fd = fds[fdi++]; + } + + ret = dma_controller_add_region(vfu_ctx->dma, + dma_regions[i].addr, + dma_regions[i].size, + fd, + dma_regions[i].offset); + if (ret < 0) { + vfu_log(vfu_ctx, VFU_INF, + "failed to add DMA region %#lx-%#lx offset=%#lx fd=%d: %s", + dma_regions[i].addr, + dma_regions[i].addr + dma_regions[i].size - 1, + dma_regions[i].offset, fd, + strerror(-ret)); + } else { + vfu_log(vfu_ctx, VFU_DBG, + "added DMA region %#lx-%#lx offset=%#lx fd=%d", + dma_regions[i].addr, + dma_regions[i].addr + dma_regions[i].size - 1, + dma_regions[i].offset, fd); + } + } else { + ret = dma_controller_remove_region(vfu_ctx->dma, + dma_regions[i].addr, + dma_regions[i].size, + vfu_ctx->unmap_dma, vfu_ctx->pvt); + if (ret < 0) { + vfu_log(vfu_ctx, VFU_INF, + "failed to remove DMA region %#lx-%#lx: %s", + dma_regions[i].addr, + dma_regions[i].addr + dma_regions[i].size - 1, + strerror(-ret)); + } else { + vfu_log(vfu_ctx, VFU_DBG, + "removed DMA region %#lx-%#lx", + dma_regions[i].addr, + dma_regions[i].addr + dma_regions[i].size - 1); + } + } + if (ret < 0) { + return ret; + } + if (vfu_ctx->map_dma != NULL) { + vfu_ctx->map_dma(vfu_ctx->pvt, dma_regions[i].addr, + dma_regions[i].size); + } + } + return 0; +} + +static int +handle_device_reset(vfu_ctx_t *vfu_ctx) +{ + vfu_log(vfu_ctx, VFU_DBG, "Device reset called by client"); + if (vfu_ctx->reset != NULL) { + return vfu_ctx->reset(vfu_ctx->pvt); + } + return 0; +} + +static int +validate_region_access(vfu_ctx_t *vfu_ctx, uint32_t size, uint16_t cmd, + struct vfio_user_region_access *region_access) +{ + assert(region_access != NULL); + + if (size < sizeof *region_access) { + vfu_log(vfu_ctx, VFU_ERR, "message size too small (%d)", size); + return -EINVAL; + } + + if (region_access->region > vfu_ctx->nr_regions || region_access->count <= 0) { + vfu_log(vfu_ctx, VFU_ERR, "bad region %d and/or count %d", + region_access->region, region_access->count); + return -EINVAL; + } + + if (device_is_stopped_and_copying(vfu_ctx->migration) && + !is_migr_reg(vfu_ctx, region_access->region)) { + vfu_log(vfu_ctx, VFU_ERR, + "cannot access region %d while device in stop-and-copy state", + region_access->region); + return -EINVAL; + } + + if (cmd == VFIO_USER_REGION_WRITE && + size - sizeof *region_access != region_access->count) + { + vfu_log(vfu_ctx, VFU_ERR, "bad region access, expected %lu, actual %d", + size - sizeof *region_access, region_access->count); + return -EINVAL; + } + + return 0; +} + +static int +handle_region_access(vfu_ctx_t *vfu_ctx, uint32_t size, uint16_t cmd, + void **data, size_t *len, + struct vfio_user_region_access *region_access) +{ + uint64_t count, offset; + int ret; + char *buf; + + assert(vfu_ctx != NULL); + assert(data != NULL); + assert(region_access != NULL); + + ret = validate_region_access(vfu_ctx, size, cmd, region_access); + if (ret < 0) { + return ret; + } + + *len = sizeof *region_access; + if (cmd == VFIO_USER_REGION_READ) { + *len += region_access->count; + } + *data = malloc(*len); + if (*data == NULL) { + return -ENOMEM; + } + if (cmd == VFIO_USER_REGION_READ) { + buf = (char*)(((struct vfio_user_region_access*)(*data)) + 1); + } else { + buf = (char*)(region_access + 1); + } + + count = region_access->count; + offset = region_to_offset(region_access->region) + region_access->offset; + + ret = vfu_access(vfu_ctx, cmd == VFIO_USER_REGION_WRITE, buf, count, &offset); + if (ret != (int)region_access->count) { + vfu_log(vfu_ctx, VFU_ERR, "failed to %s %#x-%#lx: %d", + cmd == VFIO_USER_REGION_WRITE ? "write" : "read", + region_access->count, + region_access->offset + region_access->count - 1, ret); + /* FIXME we should return whatever has been accessed, not an error */ + if (ret >= 0) { + ret = -EINVAL; + } + return ret; + } + + region_access = *data; + region_access->count = ret; + + return 0; +} + +static int +handle_dirty_pages_get(vfu_ctx_t *vfu_ctx, + struct iovec **iovecs, size_t *nr_iovecs, + struct vfio_iommu_type1_dirty_bitmap_get *ranges, + uint32_t size) +{ + int ret = -EINVAL; + size_t i; + + assert(vfu_ctx != NULL); + assert(iovecs != NULL); + assert(nr_iovecs != NULL); + assert(ranges != NULL); + + if (size % sizeof(struct vfio_iommu_type1_dirty_bitmap_get) != 0) { + return -EINVAL; + } + *nr_iovecs = 1 + size / sizeof(struct vfio_iommu_type1_dirty_bitmap_get); + *iovecs = malloc(*nr_iovecs * sizeof(struct iovec)); + if (*iovecs == NULL) { + return -ENOMEM; + } + + for (i = 1; i < *nr_iovecs; i++) { + struct vfio_iommu_type1_dirty_bitmap_get *r = &ranges[(i - 1)]; /* FIXME ugly indexing */ + ret = dma_controller_dirty_page_get(vfu_ctx->dma, r->iova, r->size, + r->bitmap.pgsize, r->bitmap.size, + (char**)&((*iovecs)[i].iov_base)); + if (ret != 0) { + goto out; + } + (*iovecs)[i].iov_len = r->bitmap.size; + } +out: + if (ret != 0) { + if (*iovecs != NULL) { + free(*iovecs); + *iovecs = NULL; + } + } + return ret; +} + +static int +handle_dirty_pages(vfu_ctx_t *vfu_ctx, uint32_t size, + struct iovec **iovecs, size_t *nr_iovecs, + struct vfio_iommu_type1_dirty_bitmap *dirty_bitmap) +{ + int ret; + + assert(vfu_ctx != NULL); + assert(iovecs != NULL); + assert(nr_iovecs != NULL); + assert(dirty_bitmap != NULL); + + if (size < sizeof *dirty_bitmap || size != dirty_bitmap->argsz) { + vfu_log(vfu_ctx, VFU_ERR, "invalid header size %u", size); + return -EINVAL; + } + + if (dirty_bitmap->flags & VFIO_IOMMU_DIRTY_PAGES_FLAG_START) { + ret = dma_controller_dirty_page_logging_start(vfu_ctx->dma, + migration_get_pgsize(vfu_ctx->migration)); + } else if (dirty_bitmap->flags & VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP) { + ret = dma_controller_dirty_page_logging_stop(vfu_ctx->dma); + } else if (dirty_bitmap->flags & VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP) { + ret = handle_dirty_pages_get(vfu_ctx, iovecs, nr_iovecs, + (struct vfio_iommu_type1_dirty_bitmap_get*)(dirty_bitmap + 1), + size - sizeof *dirty_bitmap); + } else { + vfu_log(vfu_ctx, VFU_ERR, "bad flags %#x", dirty_bitmap->flags); + ret = -EINVAL; + } + + return ret; +} + +/* + * FIXME return value is messed up, sometimes we return -1 and set errno while + * other times we return -errno. Fix. + */ + +/* + * Returns 0 if the header is valid, -errno otherwise. + */ +static int +validate_header(vfu_ctx_t *vfu_ctx, struct vfio_user_header *hdr, size_t size) +{ + assert(hdr != NULL); + + if (size < sizeof hdr) { + vfu_log(vfu_ctx, VFU_ERR, "short header read %ld", size); + return -EINVAL; + } + + if (hdr->flags.type != VFIO_USER_F_TYPE_COMMAND) { + vfu_log(vfu_ctx, VFU_ERR, "header not a request"); + return -EINVAL; + } + + if (hdr->msg_size < sizeof hdr) { + vfu_log(vfu_ctx, VFU_ERR, "bad size in header %d", hdr->msg_size); + return -EINVAL; + } + + return 0; +} + +/* + * Populates @hdr to contain the header for the next command to be processed. + * Stores any passed FDs into @fds and the number in @nr_fds. + * + * Returns 0 if there is no command to process, -errno if an error occured, or + * the number of bytes read. + */ +static int +get_next_command(vfu_ctx_t *vfu_ctx, struct vfio_user_header *hdr, int *fds, + int *nr_fds) +{ + int ret; + + /* FIXME get request shouldn't set errno, it should return it as -errno */ + ret = vfu_ctx->trans->get_request(vfu_ctx, hdr, fds, nr_fds); + if (unlikely(ret < 0)) { + if (ret == -EAGAIN || ret == -EWOULDBLOCK) { + return 0; + } + if (ret != -EINTR) { + vfu_log(vfu_ctx, VFU_ERR, "failed to receive request: %s", + strerror(-ret)); + } + return ret; + } + if (unlikely(ret == 0)) { + if (errno == EINTR) { + return -EINTR; + } + if (errno == 0) { + vfu_log(vfu_ctx, VFU_INF, "vfio-user client closed connection"); + } else { + vfu_log(vfu_ctx, VFU_ERR, "end of file: %m"); + } + return -ENOTCONN; + } + return ret; +} + +static int +process_request(vfu_ctx_t *vfu_ctx) +{ + struct vfio_user_header hdr = { 0, }; + int ret; + int *fds = NULL; + int nr_fds; + struct vfio_irq_info irq_info; + struct vfio_device_info dev_info; + struct vfio_region_info *dev_reg_info = NULL; + struct iovec _iovecs[2] = { { 0, } }; + struct iovec *iovecs = NULL; + size_t nr_iovecs = 0; + bool free_iovec_data = true; + void *cmd_data = NULL; + + assert(vfu_ctx != NULL); + + if (device_is_stopped(vfu_ctx->migration)) { + return -ESHUTDOWN; + } + + /* + * FIXME if migration device state is VFIO_DEVICE_STATE_STOP then only + * migration-related operations should execute. However, some operations + * are harmless (e.g. get region info). At the minimum we should fail + * accesses to device regions other than the migration region. I'd expect + * DMA unmap and get dirty pages to be required even in the stop-and-copy + * state. + */ + + nr_fds = vfu_ctx->client_max_fds; + fds = alloca(nr_fds * sizeof(int)); + + ret = get_next_command(vfu_ctx, &hdr, fds, &nr_fds); + if (ret <= 0) { + return ret; + } + + ret = validate_header(vfu_ctx, &hdr, ret); + if (ret < 0) { + return ret; + } + + /* + * TODO from now on if an error occurs we still need to reply. Move this + * code into a separate function so that we don't have to use goto. + */ + + hdr.msg_size -= sizeof(hdr); + if (hdr.msg_size > 0) { + cmd_data = malloc(hdr.msg_size); + if (cmd_data == NULL) { + ret = -ENOMEM; + goto reply; + } + // FIXME: should be transport op + ret = recv(vfu_ctx->conn_fd, cmd_data, hdr.msg_size, 0); + if (ret < 0) { + ret = -errno; + goto reply; + } + if (ret != (int)hdr.msg_size) { + vfu_log(vfu_ctx, VFU_ERR, "short read, expected=%d, actual=%d", + hdr.msg_size, ret); + ret = -EINVAL; + goto reply; + } + } + + if (device_is_stopped_and_copying(vfu_ctx->migration) + && !(hdr.cmd == VFIO_USER_REGION_READ || hdr.cmd == VFIO_USER_REGION_WRITE)) { + vfu_log(vfu_ctx, VFU_ERR, + "bad command %d while device in stop-and-copy state", hdr.cmd); + ret = -EINVAL; + goto reply; + } + + switch (hdr.cmd) { + case VFIO_USER_DMA_MAP: + case VFIO_USER_DMA_UNMAP: + ret = handle_dma_map_or_unmap(vfu_ctx, hdr.msg_size, + hdr.cmd == VFIO_USER_DMA_MAP, + fds, nr_fds, cmd_data); + break; + case VFIO_USER_DEVICE_GET_INFO: + ret = handle_device_get_info(vfu_ctx, hdr.msg_size, &dev_info); + if (ret >= 0) { + _iovecs[1].iov_base = &dev_info; + _iovecs[1].iov_len = dev_info.argsz; + iovecs = _iovecs; + nr_iovecs = 2; + } + break; + case VFIO_USER_DEVICE_GET_REGION_INFO: + ret = handle_device_get_region_info(vfu_ctx, hdr.msg_size, cmd_data, + &dev_reg_info); + if (ret == 0) { + _iovecs[1].iov_base = dev_reg_info; + _iovecs[1].iov_len = dev_reg_info->argsz; + iovecs = _iovecs; + nr_iovecs = 2; + } + break; + case VFIO_USER_DEVICE_GET_IRQ_INFO: + ret = handle_device_get_irq_info(vfu_ctx, hdr.msg_size, cmd_data, + &irq_info); + if (ret == 0) { + _iovecs[1].iov_base = &irq_info; + _iovecs[1].iov_len = sizeof irq_info; + iovecs = _iovecs; + nr_iovecs = 2; + } + break; + case VFIO_USER_DEVICE_SET_IRQS: + ret = handle_device_set_irqs(vfu_ctx, hdr.msg_size, fds, nr_fds, + cmd_data); + break; + case VFIO_USER_REGION_READ: + case VFIO_USER_REGION_WRITE: + iovecs = _iovecs; + ret = handle_region_access(vfu_ctx, hdr.msg_size, hdr.cmd, + &iovecs[1].iov_base, &iovecs[1].iov_len, + cmd_data); + nr_iovecs = 2; + break; + case VFIO_USER_DEVICE_RESET: + ret = handle_device_reset(vfu_ctx); + break; + case VFIO_USER_DIRTY_PAGES: + // FIXME: don't allow migration calls if migration == NULL + ret = handle_dirty_pages(vfu_ctx, hdr.msg_size, &iovecs, &nr_iovecs, + cmd_data); + if (ret >= 0) { + free_iovec_data = false; + } + break; + default: + vfu_log(vfu_ctx, VFU_ERR, "bad command %d", hdr.cmd); + ret = -EINVAL; + goto reply; + } + +reply: + /* + * TODO: In case of error during command handling set errno respectively + * in the reply message. + */ + if (ret < 0) { + vfu_log(vfu_ctx, VFU_ERR, "failed to handle command %d: %s", hdr.cmd, + strerror(-ret)); + } else { + ret = 0; + } + + // FIXME: SPEC: should the reply include the command? I'd say yes? + ret = vfu_send_iovec(vfu_ctx->conn_fd, hdr.msg_id, true, + 0, iovecs, nr_iovecs, NULL, 0, -ret); + if (unlikely(ret < 0)) { + vfu_log(vfu_ctx, VFU_ERR, "failed to complete command: %s", + strerror(-ret)); + } + if (iovecs != NULL && iovecs != _iovecs) { + if (free_iovec_data) { + size_t i; + for (i = 0; i < nr_iovecs; i++) { + free(iovecs[i].iov_base); + } + } + free(iovecs); + } + free(cmd_data); + + return ret; +} + +static int prepare_ctx(vfu_ctx_t *vfu_ctx) +{ + vfu_reg_info_t *cfg_reg; + const vfu_reg_info_t zero_reg = { 0 }; + int err; + uint32_t max_ivs = 0, i; + size_t size; + + if (vfu_ctx->ready != 0) { + return 0; + } + + /* + * With LIBVFIO_USER_FLAG_ATTACH_NB caller is always expected to call + * vfu_ctx_try_attach(). + */ + if ((vfu_ctx->flags & LIBVFIO_USER_FLAG_ATTACH_NB) == 0) { + vfu_ctx->conn_fd = vfu_ctx->trans->attach(vfu_ctx); + if (vfu_ctx->conn_fd < 0) { + err = vfu_ctx->conn_fd; + if (err != EINTR) { + vfu_log(vfu_ctx, VFU_ERR, "failed to attach: %s", + strerror(-err)); + } + return err; + } + } + + cfg_reg = &vfu_ctx->reg_info[VFU_PCI_DEV_CFG_REGION_IDX]; + + // Set a default config region if none provided. + /* TODO should it be enough to check that the size of region is 0? */ + if (memcmp(cfg_reg, &zero_reg, sizeof(*cfg_reg)) == 0) { + cfg_reg->flags = VFU_REG_FLAG_RW; + cfg_reg->size = PCI_CFG_SPACE_SIZE; + } + + // This maybe allocated by vfu_setup_pci_config_hdr(). + if (vfu_ctx->pci_config_space == NULL) { + vfu_ctx->pci_config_space = calloc(1, cfg_reg->size); + if (vfu_ctx->pci_config_space == NULL) { + return -ENOMEM; + } + } + + // Set type for region registers. + for (i = 0; i < PCI_BARS_NR; i++) { + if (!(vfu_ctx->reg_info[i].flags & VFU_REG_FLAG_MEM)) { + vfu_ctx->pci_config_space->hdr.bars[i].io.region_type |= 0x1; + } + } + + if (vfu_ctx->irqs == NULL) { + /* + * FIXME need to check that the number of MSI and MSI-X IRQs are valid + * (1, 2, 4, 8, 16 or 32 for MSI and up to 2048 for MSI-X). + */ + + // Work out highest count of irq vectors. + for (i = 0; i < VFU_DEV_NUM_IRQS; i++) { + if (max_ivs < vfu_ctx->irq_count[i]) { + max_ivs = vfu_ctx->irq_count[i]; + } + } + + //FIXME: assert(max_ivs > 0)? + size = sizeof(int) * max_ivs; + vfu_ctx->irqs = calloc(1, sizeof(vfu_irqs_t) + size); + if (vfu_ctx->irqs == NULL) { + // vfu_ctx->pci_config_space should be free'ed by vfu_destroy_ctx(). + return -ENOMEM; + } + + // Set context irq information. + for (i = 0; i < max_ivs; i++) { + vfu_ctx->irqs->efds[i] = -1; + } + vfu_ctx->irqs->err_efd = -1; + vfu_ctx->irqs->req_efd = -1; + vfu_ctx->irqs->type = IRQ_NONE; + vfu_ctx->irqs->max_ivs = max_ivs; + + // Reflect on the config space whether INTX is available. + if (vfu_ctx->irq_count[VFU_DEV_INTX_IRQ] != 0) { + vfu_ctx->pci_config_space->hdr.intr.ipin = 1; // INTA# + } + } + + if (vfu_ctx->caps != NULL) { + vfu_ctx->pci_config_space->hdr.sts.cl = 0x1; + vfu_ctx->pci_config_space->hdr.cap = PCI_STD_HEADER_SIZEOF; + } + vfu_ctx->ready = 1; + + return 0; +} + +int +vfu_ctx_drive(vfu_ctx_t *vfu_ctx) +{ + int err; + + if (vfu_ctx == NULL) { + return ERROR(EINVAL); + } + + err = prepare_ctx(vfu_ctx); + if (err < 0) { + return ERROR(-err); + } + + do { + err = process_request(vfu_ctx); + } while (err >= 0); + + return err; +} + +int +vfu_ctx_poll(vfu_ctx_t *vfu_ctx) +{ + int err; + + if (unlikely((vfu_ctx->flags & LIBVFIO_USER_FLAG_ATTACH_NB) == 0)) { + return -ENOTSUP; + } + + assert(vfu_ctx->ready == 1); + err = process_request(vfu_ctx); + + return err >= 0 ? 0 : err; +} + +/* FIXME this is not enough anymore ? */ +void * +vfu_mmap(vfu_ctx_t *vfu_ctx, off_t offset, size_t length) +{ + if ((vfu_ctx == NULL) || (length == 0) || !PAGE_ALIGNED(offset)) { + if (vfu_ctx != NULL) { + vfu_log(vfu_ctx, VFU_DBG, "bad device mmap region %#lx-%#lx\n", + offset, offset + length); + } + errno = EINVAL; + return MAP_FAILED; + } + + return mmap(NULL, length, PROT_READ | PROT_WRITE, MAP_SHARED, + vfu_ctx->fd, offset); +} + +static void +free_sparse_mmap_areas(vfu_ctx_t *vfu_ctx) +{ + int i; + + assert(vfu_ctx != NULL); + + for (i = 0; i < (int)vfu_ctx->nr_regions; i++) { + free(vfu_ctx->reg_info[i].mmap_areas); + } +} + +void +vfu_ctx_destroy(vfu_ctx_t *vfu_ctx) +{ + + if (vfu_ctx == NULL) { + return; + } + + free(vfu_ctx->uuid); + free(vfu_ctx->pci_config_space); + if (vfu_ctx->trans->detach != NULL) { + vfu_ctx->trans->detach(vfu_ctx); + } + if (vfu_ctx->dma != NULL) { + dma_controller_destroy(vfu_ctx->dma); + } + free_sparse_mmap_areas(vfu_ctx); + free(vfu_ctx->reg_info); + free(vfu_ctx->caps); + free(vfu_ctx->migration); + free(vfu_ctx->irqs); + free(vfu_ctx); + // FIXME: Maybe close any open irq efds? Unmap stuff? +} + +struct vfu_sparse_mmap_areas* +copy_sparse_mmap_area(struct vfu_sparse_mmap_areas *src) +{ + struct vfu_sparse_mmap_areas *dest; + size_t size; + + assert(src != NULL); + + size = sizeof(*dest) + (src->nr_mmap_areas * sizeof(struct vfu_mmap_area)); + dest = calloc(1, size); + if (dest != NULL) { + memcpy(dest, src, size); + } + return dest; +} + +int +vfu_ctx_try_attach(vfu_ctx_t *vfu_ctx) +{ + int err; + + assert(vfu_ctx != NULL); + + if ((vfu_ctx->flags & LIBVFIO_USER_FLAG_ATTACH_NB) == 0) { + return ERROR(EINVAL); + } + + err = prepare_ctx(vfu_ctx); + if (err < 0) { + return ERROR(-err); + } + + return vfu_ctx->trans->attach(vfu_ctx); +} + +vfu_ctx_t *vfu_create_ctx(vfu_trans_t trans, const char *path, int flags, + void *pvt) +{ + vfu_ctx_t *vfu_ctx = NULL; + int err = 0; + + if (trans != VFU_TRANS_SOCK) { + errno = ENOTSUP; + return NULL; + } + + vfu_ctx = calloc(1, sizeof(vfu_ctx_t)); + if (vfu_ctx == NULL) { + return NULL; + } + vfu_ctx->trans = &sock_transport_ops; + + //FIXME: Validate arguments. + // Set other context data. + vfu_ctx->pvt = pvt; + vfu_ctx->flags = flags; + vfu_ctx->log_lvl = VFU_ERR; + + vfu_ctx->uuid = strdup(path); + if (vfu_ctx->uuid == NULL) { + err = errno; + goto out; + } + + /* + * FIXME: Now we always allocate for migration region. Check if its better + * to seperate migration region from standard regions in vfu_ctx.reg_info + * and move it into vfu_ctx.migration. + */ + vfu_ctx->nr_regions = VFU_PCI_DEV_NUM_REGIONS + 1; + vfu_ctx->reg_info = calloc(vfu_ctx->nr_regions, sizeof *vfu_ctx->reg_info); + if (vfu_ctx->reg_info == NULL) { + err = -ENOMEM; + goto out; + } + + if (vfu_ctx->trans->init != NULL) { + err = vfu_ctx->trans->init(vfu_ctx); + if (err < 0) { + goto out; + } + vfu_ctx->fd = err; + } + err = 0; + +out: + if (err != 0) { + if (vfu_ctx != NULL) { + vfu_ctx_destroy(vfu_ctx); + vfu_ctx = NULL; + } + errno = -err; + } + + return vfu_ctx; +} + +int vfu_setup_log(vfu_ctx_t *vfu_ctx, vfu_log_fn_t *log, vfu_log_lvl_t log_lvl) +{ + + if (log_lvl != VFU_ERR && log_lvl != VFU_INF && log_lvl != VFU_DBG) { + return ERROR(EINVAL); + } + + vfu_ctx->log = log; + vfu_ctx->log_lvl = log_lvl; + + return 0; +} + +int vfu_pci_setup_config_hdr(vfu_ctx_t *vfu_ctx, vfu_pci_hdr_id_t id, + vfu_pci_hdr_ss_t ss, vfu_pci_hdr_cc_t cc, + UNUSED bool extended) +{ + vfu_pci_config_space_t *config_space; + + assert(vfu_ctx != NULL); + + if (vfu_ctx->pci_config_space != NULL) { + vfu_log(vfu_ctx, VFU_ERR, "pci header already setup"); + return ERROR(EEXIST); + } + + /* TODO: supported extended PCI config space. */ + + // Allocate a buffer for the config space. + config_space = calloc(1, PCI_CFG_SPACE_SIZE); + if (config_space == NULL) { + return ERROR(ENOMEM); + } + + config_space->hdr.id = id; + config_space->hdr.ss = ss; + config_space->hdr.cc = cc; + vfu_ctx->pci_config_space = config_space; + + return 0; +} + +int vfu_pci_setup_caps(vfu_ctx_t *vfu_ctx, vfu_cap_t **caps, int nr_caps) +{ + int ret; + + assert(vfu_ctx != NULL); + + if (vfu_ctx->caps != NULL) { + vfu_log(vfu_ctx, VFU_ERR, "capabilities are already setup"); + return ERROR(EEXIST); + } + + if (caps == NULL || nr_caps == 0) { + vfu_log(vfu_ctx, VFU_ERR, "Invalid args passed"); + return ERROR(EINVAL); + } + + vfu_ctx->caps = caps_create(vfu_ctx, caps, nr_caps, &ret); + if (vfu_ctx->caps == NULL) { + vfu_log(vfu_ctx, VFU_ERR, "failed to create PCI capabilities: %s", + strerror(ret)); + return ERROR(ret); + } + + return 0; +} + +static int +copy_sparse_mmap_areas(vfu_reg_info_t *reg_info, + struct vfu_sparse_mmap_areas *mmap_areas) +{ + int nr_mmap_areas; + size_t size; + + if (mmap_areas == NULL) { + return 0; + } + + nr_mmap_areas = mmap_areas->nr_mmap_areas; + size = sizeof(*mmap_areas) + (nr_mmap_areas * sizeof(struct vfu_mmap_area)); + reg_info->mmap_areas = calloc(1, size); + if (reg_info->mmap_areas == NULL) { + return -ENOMEM; + } + + memcpy(reg_info->mmap_areas, mmap_areas, size); + + return 0; +} + +static inline bool is_valid_pci_config_space_region(int flags, size_t size) +{ + return flags == VFU_REG_FLAG_RW && (size == PCI_CFG_SPACE_SIZE + || size == PCI_CFG_SPACE_EXP_SIZE); +} + +int vfu_setup_region(vfu_ctx_t *vfu_ctx, int region_idx, size_t size, + vfu_region_access_cb_t *region_access, int flags, + struct vfu_sparse_mmap_areas *mmap_areas, + vfu_map_region_cb_t *map) +{ + int ret; + + assert(vfu_ctx != NULL); + + switch(region_idx) { + case VFU_PCI_DEV_BAR0_REGION_IDX ... VFU_PCI_DEV_VGA_REGION_IDX: + // Validate the config region provided. + if (region_idx == VFU_PCI_DEV_CFG_REGION_IDX && + !is_valid_pci_config_space_region(flags, size)) { + return ERROR(EINVAL); + } + + vfu_ctx->reg_info[region_idx].flags = flags; + vfu_ctx->reg_info[region_idx].size = size; + vfu_ctx->reg_info[region_idx].fn = region_access; + + if (map != NULL) { + vfu_ctx->reg_info[region_idx].map = map; + } + if (mmap_areas) { + ret = copy_sparse_mmap_areas(&vfu_ctx->reg_info[region_idx], + mmap_areas); + if (ret < 0) { + return ERROR(-ret); + } + } + break; + default: + vfu_log(vfu_ctx, VFU_ERR, "Invalid region index %d", region_idx); + return ERROR(EINVAL); + } + + return 0; +} + +int vfu_setup_device_reset_cb(vfu_ctx_t *vfu_ctx, vfu_reset_cb_t *reset) +{ + + assert(vfu_ctx != NULL); + vfu_ctx->reset = reset; + + return 0; +} + +int vfu_setup_device_dma_cb(vfu_ctx_t *vfu_ctx, vfu_map_dma_cb_t *map_dma, + vfu_unmap_dma_cb_t *unmap_dma) +{ + + assert(vfu_ctx != NULL); + + vfu_ctx->map_dma = map_dma; + vfu_ctx->unmap_dma = unmap_dma; + + // Create the internal DMA controller. + if (vfu_ctx->unmap_dma != NULL) { + vfu_ctx->dma = dma_controller_create(vfu_ctx, VFU_DMA_REGIONS); + if (vfu_ctx->dma == NULL) { + return ERROR(ENOMEM); + } + } + + return 0; +} + +int vfu_setup_device_nr_irqs(vfu_ctx_t *vfu_ctx, enum vfu_dev_irq_type type, + uint32_t count) +{ + + assert(vfu_ctx != NULL); + + if (type < VFU_DEV_INTX_IRQ || type > VFU_DEV_REQ_IRQ) { + vfu_log(vfu_ctx, VFU_ERR, "Invalid IRQ index %d, should be between " + "(%d to %d)", type, VFU_DEV_INTX_IRQ, + VFU_DEV_REQ_IRQ); + return ERROR(EINVAL); + } + + vfu_ctx->irq_count[type] = count; + + return 0; +} + +int vfu_setup_device_migration(vfu_ctx_t *vfu_ctx, vfu_migration_t *migration) +{ + vfu_reg_info_t *migr_reg; + int ret = 0; + + assert(vfu_ctx != NULL); + + //FIXME: Validate args. + + if (vfu_ctx->migr_reg != NULL) { + vfu_log(vfu_ctx, VFU_ERR, "device migration is already setup"); + return ERROR(EEXIST); + } + + /* FIXME hacky, find a more robust way to allocate a region index */ + migr_reg = &vfu_ctx->reg_info[(vfu_ctx->nr_regions - 1)]; + + /* FIXME: Are there sparse areas need to be setup flags accordingly */ + ret = copy_sparse_mmap_areas(migr_reg, migration->mmap_areas); + if (ret < 0) { + return ERROR(-ret); + } + + migr_reg->flags = VFU_REG_FLAG_RW; + migr_reg->size = sizeof(struct vfio_device_migration_info) + migration->size; + + vfu_ctx->migration = init_migration(migration, &ret); + if (vfu_ctx->migration == NULL) { + vfu_log(vfu_ctx, VFU_ERR, "failed to initialize device migration"); + free(migr_reg->mmap_areas); + return ERROR(ret); + } + vfu_ctx->migr_reg = migr_reg; + + return 0; +} + +/* + * Returns a pointer to the standard part of the PCI configuration space. + */ +inline vfu_pci_config_space_t * +vfu_pci_get_config_space(vfu_ctx_t *vfu_ctx) +{ + assert(vfu_ctx != NULL); + return vfu_ctx->pci_config_space; +} + +/* + * Returns a pointer to the non-standard part of the PCI configuration space. + */ +inline uint8_t * +vfu_get_pci_non_std_config_space(vfu_ctx_t *vfu_ctx) +{ + assert(vfu_ctx != NULL); + return (uint8_t *)&vfu_ctx->pci_config_space->non_std; +} + +inline vfu_reg_info_t * +vfu_get_region_info(vfu_ctx_t *vfu_ctx) +{ + assert(vfu_ctx != NULL); + return vfu_ctx->reg_info; +} + +inline int +vfu_addr_to_sg(vfu_ctx_t *vfu_ctx, dma_addr_t dma_addr, + uint32_t len, dma_sg_t *sg, int max_sg, int prot) +{ + assert(vfu_ctx != NULL); + + if (unlikely(vfu_ctx->unmap_dma == NULL)) { + errno = EINVAL; + return -1; + } + return dma_addr_to_sg(vfu_ctx->dma, dma_addr, len, sg, max_sg, prot); +} + +inline int +vfu_map_sg(vfu_ctx_t *vfu_ctx, const dma_sg_t *sg, + struct iovec *iov, int cnt) +{ + if (unlikely(vfu_ctx->unmap_dma == NULL)) { + errno = EINVAL; + return -1; + } + return dma_map_sg(vfu_ctx->dma, sg, iov, cnt); +} + +inline void +vfu_unmap_sg(vfu_ctx_t *vfu_ctx, const dma_sg_t *sg, struct iovec *iov, int cnt) +{ + if (unlikely(vfu_ctx->unmap_dma == NULL)) { + return; + } + return dma_unmap_sg(vfu_ctx->dma, sg, iov, cnt); +} + +uint8_t * +vfu_ctx_get_cap(vfu_ctx_t *vfu_ctx, uint8_t id) +{ + assert(vfu_ctx != NULL); + + return cap_find_by_id(vfu_ctx, id); +} + +int +vfu_dma_read(vfu_ctx_t *vfu_ctx, dma_sg_t *sg, void *data) +{ + struct vfio_user_dma_region_access *dma_recv; + struct vfio_user_dma_region_access dma_send; + int recv_size; + int msg_id = 1, ret; + + assert(vfu_ctx != NULL); + assert(sg != NULL); + + recv_size = sizeof(*dma_recv) + sg->length; + + dma_recv = calloc(recv_size, 1); + if (dma_recv == NULL) { + return -ENOMEM; + } + + dma_send.addr = sg->dma_addr; + dma_send.count = sg->length; + ret = vfu_msg(vfu_ctx->conn_fd, msg_id, VFIO_USER_DMA_READ, + &dma_send, sizeof dma_send, NULL, + dma_recv, recv_size); + memcpy(data, dma_recv->data, sg->length); /* FIXME no need for memcpy */ + free(dma_recv); + + return ret; +} + +int +vfu_dma_write(vfu_ctx_t *vfu_ctx, dma_sg_t *sg, void *data) +{ + struct vfio_user_dma_region_access *dma_send, dma_recv; + int send_size = sizeof(*dma_send) + sg->length; + int msg_id = 1, ret; + + assert(vfu_ctx != NULL); + assert(sg != NULL); + + dma_send = calloc(send_size, 1); + if (dma_send == NULL) { + return -ENOMEM; + } + dma_send->addr = sg->dma_addr; + dma_send->count = sg->length; + memcpy(dma_send->data, data, sg->length); /* FIXME no need to copy! */ + ret = vfu_msg(vfu_ctx->conn_fd, msg_id, VFIO_USER_DMA_WRITE, + dma_send, send_size, NULL, + &dma_recv, sizeof(dma_recv)); + free(dma_send); + + return ret; +} + +/* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lib/migration.c b/lib/migration.c index 3493617..2c99c3a 100644 --- a/lib/migration.c +++ b/lib/migration.c @@ -47,7 +47,7 @@ enum migr_iter_state { struct migration { struct vfio_device_migration_info info; size_t pgsize; - lm_migration_callbacks_t callbacks; + vfu_migration_callbacks_t callbacks; struct { enum migr_iter_state state; __u64 offset; @@ -75,12 +75,13 @@ static const __u32 migr_states[VFIO_DEVICE_STATE_MASK] = { (1 << VFIO_DEVICE_STATE_RESUMING) }; -struct migration *init_migration(const lm_migration_t * const lm_migr, int *err) +struct migration * +init_migration(const vfu_migration_t * const vfu_migr, int *err) { struct migration *migr; *err = 0; - if (lm_migr->size < sizeof(struct vfio_device_migration_info)) { + if (vfu_migr->size < sizeof(struct vfio_device_migration_info)) { *err = EINVAL; return NULL; } @@ -98,10 +99,10 @@ struct migration *init_migration(const lm_migration_t * const lm_migr, int *err) migr->pgsize = sysconf(_SC_PAGESIZE); - /* FIXME this should be done in lm_ctx_run or poll */ + /* FIXME this should be done in vfu_ctx_run or poll */ migr->info.device_state = VFIO_DEVICE_STATE_RUNNING; - migr->callbacks = lm_migr->callbacks; + migr->callbacks = vfu_migr->callbacks; if (migr->callbacks.transition == NULL || migr->callbacks.get_pending_bytes == NULL || migr->callbacks.prepare_data == NULL || @@ -122,9 +123,9 @@ _migr_state_transition_is_valid(__u32 from, __u32 to) } static ssize_t -handle_device_state(lm_ctx_t *lm_ctx, void *pvt, - struct migration *migr, __u32 *device_state, - bool is_write) { +handle_device_state(vfu_ctx_t *vfu_ctx, void *pvt, + struct migration *migr, __u32 *device_state, + bool is_write) { int ret; @@ -137,24 +138,24 @@ handle_device_state(lm_ctx_t *lm_ctx, void *pvt, } if (*device_state & ~VFIO_DEVICE_STATE_MASK) { - lm_log(lm_ctx, LM_ERR, "bad device state %#x", *device_state); + vfu_log(vfu_ctx, VFU_ERR, "bad device state %#x", *device_state); return -EINVAL; } if (!_migr_state_transition_is_valid(migr->info.device_state, *device_state)) { /* TODO print descriptive device state names instead of raw value */ - lm_log(lm_ctx, LM_ERR, "bad transition from state %d to state %d", + vfu_log(vfu_ctx, VFU_ERR, "bad transition from state %d to state %d", migr->info.device_state, *device_state); return -EINVAL; } switch (*device_state) { case VFIO_DEVICE_STATE_STOP: - ret = migr->callbacks.transition(pvt, LM_MIGR_STATE_STOP); + ret = migr->callbacks.transition(pvt, VFU_MIGR_STATE_STOP); break; case VFIO_DEVICE_STATE_RUNNING: - ret = migr->callbacks.transition(pvt, LM_MIGR_STATE_RUNNING); + ret = migr->callbacks.transition(pvt, VFU_MIGR_STATE_RUNNING); break; case VFIO_DEVICE_STATE_SAVING: /* @@ -163,13 +164,13 @@ handle_device_state(lm_ctx_t *lm_ctx, void *pvt, * the migration region? E.g. Access to any other region should be * failed? This might be a good question to send to LKML. */ - ret = migr->callbacks.transition(pvt, LM_MIGR_STATE_STOP_AND_COPY); + ret = migr->callbacks.transition(pvt, VFU_MIGR_STATE_STOP_AND_COPY); break; case VFIO_DEVICE_STATE_RUNNING | VFIO_DEVICE_STATE_SAVING: - ret = migr->callbacks.transition(pvt, LM_MIGR_STATE_PRE_COPY); + ret = migr->callbacks.transition(pvt, VFU_MIGR_STATE_PRE_COPY); break; case VFIO_DEVICE_STATE_RESUMING: - ret = migr->callbacks.transition(pvt, LM_MIGR_STATE_RESUME); + ret = migr->callbacks.transition(pvt, VFU_MIGR_STATE_RESUME); break; default: assert(false); @@ -178,13 +179,15 @@ handle_device_state(lm_ctx_t *lm_ctx, void *pvt, if (ret == 0) { migr->info.device_state = *device_state; } else if (ret < 0) { - lm_log(lm_ctx, LM_ERR, "failed to transition to state %d: %s", - *device_state, strerror(-ret)); + vfu_log(vfu_ctx, VFU_ERR, "failed to transition to state %d: %s", + *device_state, strerror(-ret)); } return ret; } +// FIXME: no need to use __u* type variants + static ssize_t handle_pending_bytes(void *pvt, struct migration *migr, __u64 *pending_bytes, bool is_write) @@ -238,7 +241,7 @@ handle_pending_bytes(void *pvt, struct migration *migr, */ static ssize_t -handle_data_offset_when_saving(lm_ctx_t *lm_ctx, void *pvt, +handle_data_offset_when_saving(vfu_ctx_t *vfu_ctx, void *pvt, struct migration *migr, bool is_write) { int ret = 0; @@ -246,7 +249,7 @@ handle_data_offset_when_saving(lm_ctx_t *lm_ctx, void *pvt, assert(migr != NULL); if (is_write) { - lm_log(lm_ctx, LM_ERR, "data_offset is RO when saving"); + vfu_log(vfu_ctx, VFU_ERR, "data_offset is RO when saving"); return -EINVAL; } @@ -264,7 +267,8 @@ handle_data_offset_when_saving(lm_ctx_t *lm_ctx, void *pvt, */ break; default: - lm_log(lm_ctx, LM_ERR, "reading data_offset out of sequence is undefined"); + vfu_log(vfu_ctx, VFU_ERR, + "reading data_offset out of sequence is undefined"); return -EINVAL; } @@ -272,7 +276,7 @@ handle_data_offset_when_saving(lm_ctx_t *lm_ctx, void *pvt, } static ssize_t -handle_data_offset(lm_ctx_t *lm_ctx, void *pvt, struct migration *migr, +handle_data_offset(vfu_ctx_t *vfu_ctx, void *pvt, struct migration *migr, __u64 *offset, bool is_write) { int ret; @@ -283,11 +287,11 @@ handle_data_offset(lm_ctx_t *lm_ctx, void *pvt, struct migration *migr, switch (migr->info.device_state) { case VFIO_DEVICE_STATE_SAVING: case VFIO_DEVICE_STATE_RUNNING | VFIO_DEVICE_STATE_SAVING: - ret = handle_data_offset_when_saving(lm_ctx, pvt, migr, is_write); + ret = handle_data_offset_when_saving(vfu_ctx, pvt, migr, is_write); break; case VFIO_DEVICE_STATE_RESUMING: if (is_write) { - lm_log(lm_ctx, LM_ERR, "bad write to migration data_offset"); + vfu_log(vfu_ctx, VFU_ERR, "bad write to migration data_offset"); ret = -EINVAL; } else { ret = 0; @@ -295,7 +299,8 @@ handle_data_offset(lm_ctx_t *lm_ctx, void *pvt, struct migration *migr, break; default: /* TODO improve error message */ - lm_log(lm_ctx, LM_ERR, "bad access to migration data_offset in state %d", + vfu_log(vfu_ctx, VFU_ERR, + "bad access to migration data_offset in state %d", migr->info.device_state); ret = -EINVAL; } @@ -308,20 +313,21 @@ handle_data_offset(lm_ctx_t *lm_ctx, void *pvt, struct migration *migr, } static ssize_t -handle_data_size_when_saving(lm_ctx_t *lm_ctx, struct migration *migr, +handle_data_size_when_saving(vfu_ctx_t *vfu_ctx, struct migration *migr, bool is_write) { assert(migr != NULL); if (is_write) { /* TODO improve error message */ - lm_log(lm_ctx, LM_ERR, "data_size is RO when saving"); + vfu_log(vfu_ctx, VFU_ERR, "data_size is RO when saving"); return -EINVAL; } if (migr->iter.state != VFIO_USER_MIGR_ITER_STATE_STARTED && migr->iter.state != VFIO_USER_MIGR_ITER_STATE_DATA_PREPARED) { - lm_log(lm_ctx, LM_ERR, "reading data_size ouf of sequence is undefined"); + vfu_log(vfu_ctx, VFU_ERR, + "reading data_size ouf of sequence is undefined"); return -EINVAL; } return 0; @@ -344,25 +350,25 @@ handle_data_size_when_resuming(void *pvt, struct migration *migr, } static ssize_t -handle_data_size(lm_ctx_t *lm_ctx, void *pvt, struct migration *migr, +handle_data_size(vfu_ctx_t *vfu_ctx, void *pvt, struct migration *migr, __u64 *size, bool is_write) { int ret; - assert(lm_ctx != NULL); + assert(vfu_ctx != NULL); assert(size != NULL); switch (migr->info.device_state){ case VFIO_DEVICE_STATE_SAVING: case VFIO_DEVICE_STATE_RUNNING | VFIO_DEVICE_STATE_SAVING: - ret = handle_data_size_when_saving(lm_ctx, migr, is_write); + ret = handle_data_size_when_saving(vfu_ctx, migr, is_write); break; case VFIO_DEVICE_STATE_RESUMING: ret = handle_data_size_when_resuming(pvt, migr, *size, is_write); break; default: /* TODO improve error message */ - lm_log(lm_ctx, LM_ERR, "bad access to data_size"); + vfu_log(vfu_ctx, VFU_ERR, "bad access to data_size"); ret = -EINVAL; } @@ -374,7 +380,7 @@ handle_data_size(lm_ctx_t *lm_ctx, void *pvt, struct migration *migr, } static ssize_t -handle_region_access_registers(lm_ctx_t *lm_ctx, void *pvt, +handle_region_access_registers(vfu_ctx_t *vfu_ctx, void *pvt, struct migration *migr, char *buf, size_t count, loff_t pos, bool is_write) { @@ -385,34 +391,38 @@ handle_region_access_registers(lm_ctx_t *lm_ctx, void *pvt, switch (pos) { case offsetof(struct vfio_device_migration_info, device_state): if (count != sizeof(migr->info.device_state)) { - lm_log(lm_ctx, LM_ERR, "bad device_state access size %ld", count); + vfu_log(vfu_ctx, VFU_ERR, + "bad device_state access size %ld", count); return -EINVAL; } - ret = handle_device_state(lm_ctx, pvt, migr, (__u32*)buf, is_write); + ret = handle_device_state(vfu_ctx, pvt, migr, (__u32*)buf, is_write); break; case offsetof(struct vfio_device_migration_info, pending_bytes): if (count != sizeof(migr->info.pending_bytes)) { - lm_log(lm_ctx, LM_ERR, "bad pending_bytes access size %ld", count); + vfu_log(vfu_ctx, VFU_ERR, + "bad pending_bytes access size %ld", count); return -EINVAL; } ret = handle_pending_bytes(pvt, migr, (__u64*)buf, is_write); break; case offsetof(struct vfio_device_migration_info, data_offset): if (count != sizeof(migr->info.data_offset)) { - lm_log(lm_ctx, LM_ERR, "bad data_offset access size %ld", count); + vfu_log(vfu_ctx, VFU_ERR, + "bad data_offset access size %ld", count); return -EINVAL; } - ret = handle_data_offset(lm_ctx, pvt, migr, (__u64*)buf, is_write); + ret = handle_data_offset(vfu_ctx, pvt, migr, (__u64*)buf, is_write); break; case offsetof(struct vfio_device_migration_info, data_size): if (count != sizeof(migr->info.data_size)) { - lm_log(lm_ctx, LM_ERR, "bad data_size access size %ld", count); + vfu_log(vfu_ctx, VFU_ERR, + "bad data_size access size %ld", count); return -EINVAL; } - ret = handle_data_size(lm_ctx, pvt, migr, (__u64*)buf, is_write); + ret = handle_data_size(vfu_ctx, pvt, migr, (__u64*)buf, is_write); break; default: - lm_log(lm_ctx, LM_ERR, "bad migration region register offset %#lx", + vfu_log(vfu_ctx, VFU_ERR, "bad migration region register offset %#lx", pos); return -EINVAL; } @@ -420,7 +430,7 @@ handle_region_access_registers(lm_ctx_t *lm_ctx, void *pvt, } ssize_t -handle_migration_region_access(lm_ctx_t *lm_ctx, void *pvt, +handle_migration_region_access(vfu_ctx_t *vfu_ctx, void *pvt, struct migration *migr, char *buf, size_t count, loff_t pos, bool is_write) @@ -429,10 +439,10 @@ handle_migration_region_access(lm_ctx_t *lm_ctx, void *pvt, assert(migr != NULL); assert(buf != NULL); - + if (pos + count <= sizeof(struct vfio_device_migration_info)) { - ret = handle_region_access_registers(lm_ctx, pvt, migr, buf, - count, pos, is_write); + ret = handle_region_access_registers(vfu_ctx, pvt, migr, buf, + count, pos, is_write); } else { pos -= sizeof(struct vfio_device_migration_info); if (is_write) { @@ -474,8 +484,9 @@ migration_set_pgsize(struct migration *migr, size_t pgsize) assert(migr != NULL); // FIXME? - if (pgsize != PAGE_SIZE) + if (pgsize != PAGE_SIZE) { return -EINVAL; + } migr->pgsize = pgsize; return 0; diff --git a/lib/migration.h b/lib/migration.h index 079e6eb..05ed0de 100644 --- a/lib/migration.h +++ b/lib/migration.h @@ -31,28 +31,28 @@ // FIXME: license header (and SPDX ?) everywhere -#ifndef LIB_MUSER_MIGRATION_H -#define LIB_MUSER_MIGRATION_H +#ifndef LIB_VFIO_USER_MIGRATION_H +#define LIB_VFIO_USER_MIGRATION_H /* * These are not public routines, but for convenience, they are used by the - * sample/test code as well as privately within libmuser. + * sample/test code as well as privately within libvfio-user. */ #include -#include "muser.h" +#include "libvfio-user.h" -struct migration *init_migration(const lm_migration_t * const lm_migr, - int *err); +struct migration * +init_migration(const vfu_migration_t * const vfu_migr, int *err); ssize_t -handle_migration_region_access(lm_ctx_t *lm_ctx, void *pvt, +handle_migration_region_access(vfu_ctx_t *vfu_ctx, void *pvt, struct migration *migr, char *buf, size_t count, loff_t pos, bool is_write); bool -migration_available(lm_ctx_t *lm_ctx); +migration_available(vfu_ctx_t *vfu_ctx); bool device_is_stopped_and_copying(struct migration *migr); @@ -66,6 +66,6 @@ migration_get_pgsize(struct migration *migr); int migration_set_pgsize(struct migration *migr, size_t pgsize); -#endif /* LIB_MUSER_MIGRATION_H */ +#endif /* LIB_VFIO_USER_MIGRATION_H */ /* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lib/muser_ctx.c b/lib/muser_ctx.c deleted file mode 100644 index d80e952..0000000 --- a/lib/muser_ctx.c +++ /dev/null @@ -1,1643 +0,0 @@ -/* - * Copyright (c) 2019 Nutanix Inc. All rights reserved. - * - * Authors: Thanos Makatos - * Swapnil Ingle - * Felipe Franciosi - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Nutanix nor the names of its contributors may be - * used to endorse or promote products derived from this software without - * specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH - * DAMAGE. - * - */ - -#define _GNU_SOURCE -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "cap.h" -#include "dma.h" -#include "muser.h" -#include "muser_priv.h" -#include "tran_sock.h" -#include "migration.h" -#include "irq.h" - - -void -lm_log(lm_ctx_t *lm_ctx, lm_log_lvl_t lvl, const char *fmt, ...) -{ - va_list ap; - char buf[BUFSIZ]; - int _errno = errno; - - assert(lm_ctx != NULL); - - if (lm_ctx->log == NULL || lvl > lm_ctx->log_lvl || fmt == NULL) { - return; - } - - va_start(ap, fmt); - vsnprintf(buf, sizeof buf, fmt, ap); - va_end(ap); - lm_ctx->log(lm_ctx->pvt, lvl, buf); - errno = _errno; -} - -static inline int ERROR(int err) -{ - errno = err; - return -1; -} - -static size_t -get_vfio_caps_size(bool is_migr_reg, struct lm_sparse_mmap_areas *m) -{ - size_t type_size = 0; - size_t sparse_size = 0; - - if (is_migr_reg) { - type_size = sizeof(struct vfio_region_info_cap_type); - } - - if (m != NULL) { - sparse_size = sizeof(struct vfio_region_info_cap_sparse_mmap) - + (m->nr_mmap_areas * sizeof(struct vfio_region_sparse_mmap_area)); - } - - return type_size + sparse_size; -} - -/* - * Populate the sparse mmap capability information to vfio-client. - * Sparse mmap information stays after struct vfio_region_info and cap_offest - * points accordingly. - */ -static void -dev_get_caps(lm_ctx_t *lm_ctx, lm_reg_info_t *lm_reg, bool is_migr_reg, - struct vfio_region_info *vfio_reg) -{ - struct vfio_info_cap_header *header; - struct vfio_region_info_cap_type *type = NULL; - struct vfio_region_info_cap_sparse_mmap *sparse = NULL; - struct lm_sparse_mmap_areas *mmap_areas; - - assert(lm_ctx != NULL); - assert(vfio_reg != NULL); - - header = (struct vfio_info_cap_header*)(vfio_reg + 1); - - if (is_migr_reg) { - type = (struct vfio_region_info_cap_type*)header; - type->header.id = VFIO_REGION_INFO_CAP_TYPE; - type->header.version = 1; - type->header.next = 0; - type->type = VFIO_REGION_TYPE_MIGRATION; - type->subtype = VFIO_REGION_SUBTYPE_MIGRATION; - vfio_reg->cap_offset = sizeof(struct vfio_region_info); - } - - if (lm_reg->mmap_areas != NULL) { - int i, nr_mmap_areas = lm_reg->mmap_areas->nr_mmap_areas; - if (type != NULL) { - type->header.next = vfio_reg->cap_offset + sizeof(struct vfio_region_info_cap_type); - sparse = (struct vfio_region_info_cap_sparse_mmap*)(type + 1); - } else { - vfio_reg->cap_offset = sizeof(struct vfio_region_info); - sparse = (struct vfio_region_info_cap_sparse_mmap*)header; - } - sparse->header.id = VFIO_REGION_INFO_CAP_SPARSE_MMAP; - sparse->header.version = 1; - sparse->header.next = 0; - sparse->nr_areas = nr_mmap_areas; - - mmap_areas = lm_reg->mmap_areas; - for (i = 0; i < nr_mmap_areas; i++) { - sparse->areas[i].offset = mmap_areas->areas[i].start; - sparse->areas[i].size = mmap_areas->areas[i].size; - lm_log(lm_ctx, LM_DBG, "%s: area %d %#llx-%#llx", __func__, - i, sparse->areas[i].offset, - sparse->areas[i].offset + sparse->areas[i].size); - } - } - - /* - * FIXME VFIO_REGION_INFO_FLAG_MMAP is valid if the region is - * memory-mappable in general, not only if it supports sparse mmap. - */ - vfio_reg->flags |= VFIO_REGION_INFO_FLAG_MMAP | VFIO_REGION_INFO_FLAG_CAPS; -} - -#define LM_REGION_SHIFT 40 -#define LM_REGION_MASK ((1ULL << LM_REGION_SHIFT) - 1) - -uint64_t -region_to_offset(uint32_t region) -{ - return (uint64_t)region << LM_REGION_SHIFT; -} - -uint32_t -offset_to_region(uint64_t offset) -{ - return (offset >> LM_REGION_SHIFT) & LM_REGION_MASK; -} - -#ifdef LM_VERBOSE_LOGGING -void -dump_buffer(const char *prefix, const char *buf, uint32_t count) -{ - int i; - const size_t bytes_per_line = 0x8; - - if (strcmp(prefix, "")) { - fprintf(stderr, "%s\n", prefix); - } - for (i = 0; i < (int)count; i++) { - if (i % bytes_per_line != 0) { - fprintf(stderr, " "); - } - /* TODO valgrind emits a warning if count is 1 */ - fprintf(stderr,"0x%02x", *(buf + i)); - if ((i + 1) % bytes_per_line == 0) { - fprintf(stderr, "\n"); - } - } - if (i % bytes_per_line != 0) { - fprintf(stderr, "\n"); - } -} -#else -#define dump_buffer(prefix, buf, count) -#endif - -static bool -is_migr_reg(lm_ctx_t *lm_ctx, int index) -{ - return &lm_ctx->reg_info[index] == lm_ctx->migr_reg; -} - -static long -dev_get_reginfo(lm_ctx_t *lm_ctx, uint32_t index, - struct vfio_region_info **vfio_reg) -{ - lm_reg_info_t *lm_reg; - size_t caps_size; - uint32_t argsz; - - assert(lm_ctx != NULL); - assert(vfio_reg != NULL); - - lm_reg = &lm_ctx->reg_info[index]; - - if (index >= lm_ctx->nr_regions) { - lm_log(lm_ctx, LM_DBG, "bad region index %d", index); - return -EINVAL; - } - - caps_size = get_vfio_caps_size(is_migr_reg(lm_ctx, index), lm_reg->mmap_areas); - argsz = caps_size + sizeof(struct vfio_region_info); - *vfio_reg = calloc(1, argsz); - if (!*vfio_reg) { - return -ENOMEM; - } - /* FIXME document in the protocol that vfio_req->argsz is ignored */ - (*vfio_reg)->argsz = argsz; - (*vfio_reg)->flags = lm_reg->flags; - (*vfio_reg)->index = index; - (*vfio_reg)->offset = region_to_offset((*vfio_reg)->index); - (*vfio_reg)->size = lm_reg->size; - - if (caps_size > 0) { - dev_get_caps(lm_ctx, lm_reg, is_migr_reg(lm_ctx, index), *vfio_reg); - } - - lm_log(lm_ctx, LM_DBG, "region_info[%d] offset %#llx flags %#x size %llu " - "argsz %u", - (*vfio_reg)->index, (*vfio_reg)->offset, (*vfio_reg)->flags, - (*vfio_reg)->size, (*vfio_reg)->argsz); - - return 0; -} - -int -lm_get_region(loff_t pos, size_t count, loff_t *off) -{ - int r; - - assert(off != NULL); - - r = offset_to_region(pos); - if ((int)offset_to_region(pos + count) != r) { - return -ENOENT; - } - *off = pos - region_to_offset(r); - - return r; -} - -static uint32_t -region_size(lm_ctx_t *lm_ctx, int region) -{ - assert(region >= LM_PCI_DEV_BAR0_REGION_IDX && region <= LM_PCI_DEV_VGA_REGION_IDX); - return lm_ctx->reg_info[region].size; -} - -static uint32_t -pci_config_space_size(lm_ctx_t *lm_ctx) -{ - return region_size(lm_ctx, LM_PCI_DEV_CFG_REGION_IDX); -} - -static ssize_t -handle_pci_config_space_access(lm_ctx_t *lm_ctx, char *buf, size_t count, - loff_t pos, bool is_write) -{ - int ret; - - count = MIN(pci_config_space_size(lm_ctx), count); - if (is_write) { - ret = cap_maybe_access(lm_ctx, lm_ctx->caps, buf, count, pos); - if (ret < 0) { - lm_log(lm_ctx, LM_ERR, "bad access to capabilities %#lx-%#lx\n", - pos, pos + count); - return ret; - } - } else { - memcpy(buf, lm_ctx->pci_config_space->raw + pos, count); - } - return count; -} - -static ssize_t -do_access(lm_ctx_t *lm_ctx, char *buf, uint8_t count, uint64_t pos, bool is_write) -{ - int idx; - loff_t offset; - - assert(lm_ctx != NULL); - assert(buf != NULL); - assert(count == 1 || count == 2 || count == 4 || count == 8); - - idx = lm_get_region(pos, count, &offset); - if (idx < 0) { - lm_log(lm_ctx, LM_ERR, "invalid region %d", idx); - return idx; - } - - if (idx < 0 || idx >= (int)lm_ctx->nr_regions) { - lm_log(lm_ctx, LM_ERR, "bad region %d", idx); - return -EINVAL; - } - - if (idx == LM_PCI_DEV_CFG_REGION_IDX) { - return handle_pci_config_space_access(lm_ctx, buf, count, offset, - is_write); - } - - if (is_migr_reg(lm_ctx, idx)) { - if (offset + count > lm_ctx->reg_info[idx].size) { - lm_log(lm_ctx, LM_ERR, "read %#lx-%#lx past end of migration region (%#x)", - offset, offset + count - 1, - lm_ctx->reg_info[idx].size); - return -EINVAL; - } - return handle_migration_region_access(lm_ctx, lm_ctx->pvt, - lm_ctx->migration, - buf, count, offset, is_write); - } - - /* - * Checking whether a callback exists might sound expensive however this - * code is not performance critical. This works well when we don't expect a - * region to be used, so the user of the library can simply leave the - * callback NULL in lm_create_ctx. - */ - if (lm_ctx->reg_info[idx].fn != NULL) { - return lm_ctx->reg_info[idx].fn(lm_ctx->pvt, buf, count, offset, - is_write); - } - - lm_log(lm_ctx, LM_ERR, "no callback for region %d", idx); - - return -EINVAL; -} - -/* - * Returns the number of bytes processed on success or a negative number on - * error. - * - * TODO function name same lm_access_t, fix - * FIXME we must be able to return values up to uint32_t bit, or negative on - * error. Better to make return value an int and return the number of bytes - * processed via an argument. - */ -ssize_t -lm_access(lm_ctx_t *lm_ctx, char *buf, uint32_t count, uint64_t *ppos, - bool is_write) -{ - uint32_t done = 0; - int ret; - - assert(lm_ctx != NULL); - /* buf and ppos can be NULL if count is 0 */ - - while (count) { - size_t size; - /* - * Limit accesses to qword and enforce alignment. Figure out whether - * the PCI spec requires this - * FIXME while this makes sense for registers, we might be able to relax - * this requirement and make some transfers more efficient. Maybe make - * this a per-region option that can be set by the user? - */ - if (count >= 8 && !(*ppos % 8)) { - size = 8; - } else if (count >= 4 && !(*ppos % 4)) { - size = 4; - } else if (count >= 2 && !(*ppos % 2)) { - size = 2; - } else { - size = 1; - } - ret = do_access(lm_ctx, buf, size, *ppos, is_write); - if (ret <= 0) { - lm_log(lm_ctx, LM_ERR, "failed to %s %#lx-%#lx: %s", - is_write ? "write to" : "read from", *ppos, *ppos + size - 1, - strerror(-ret)); - /* - * TODO if ret < 0 then it might contain a legitimate error code, why replace it with EFAULT? - */ - return -EFAULT; - } - if (ret != (int)size) { - lm_log(lm_ctx, LM_DBG, "bad read %d != %ld", ret, size); - } - count -= size; - done += size; - *ppos += size; - buf += size; - } - return done; -} - -static inline int -muser_access(lm_ctx_t *lm_ctx, bool is_write, char *rwbuf, uint32_t count, - uint64_t *pos) -{ - uint32_t processed = 0, _count; - int ret; - - assert(lm_ctx != NULL); - assert(rwbuf != NULL); - assert(pos != NULL); - - lm_log(lm_ctx, LM_DBG, "%s %#lx-%#lx", is_write ? "W" : "R", *pos, - *pos + count - 1); - -#ifdef LM_VERBOSE_LOGGING - if (is_write) { - dump_buffer("buffer write", rwbuf, count); - } -#endif - - _count = count; - ret = muser_pci_hdr_access(lm_ctx, &_count, pos, is_write, rwbuf); - if (ret != 0) { - /* FIXME shouldn't we fail here? */ - lm_log(lm_ctx, LM_ERR, "failed to access PCI header: %s", - strerror(-ret)); -#ifdef LM_VERBOSE_LOGGING - dump_buffer("buffer write", rwbuf, _count); -#endif - } - - /* - * count is how much has been processed by muser_pci_hdr_access, - * _count is how much there's left to be processed by lm_access - */ - processed = count - _count; - ret = lm_access(lm_ctx, rwbuf + processed, _count, pos, is_write); - if (ret >= 0) { - ret += processed; -#ifdef LM_VERBOSE_LOGGING - if (!is_write && err == ret) { - dump_buffer("buffer read", rwbuf, ret); - } -#endif - } - - return ret; -} - -/* TODO merge with dev_get_reginfo */ -static int -handle_device_get_region_info(lm_ctx_t *lm_ctx, uint32_t size, - struct vfio_region_info *reg_info_in, - struct vfio_region_info **reg_info_out) -{ - if (size != sizeof(*reg_info_in) || size != reg_info_in->argsz) { - return -EINVAL; - } - - return dev_get_reginfo(lm_ctx, reg_info_in->index, reg_info_out); -} - -static int -handle_device_get_info(lm_ctx_t *lm_ctx, uint32_t size, - struct vfio_device_info *dev_info) -{ - assert(lm_ctx != NULL); - assert(dev_info != NULL); - - if (size != sizeof *dev_info) { - return -EINVAL; - } - - dev_info->argsz = sizeof *dev_info; - dev_info->flags = VFIO_DEVICE_FLAGS_PCI | VFIO_DEVICE_FLAGS_RESET; - dev_info->num_regions = lm_ctx->nr_regions; - dev_info->num_irqs = LM_DEV_NUM_IRQS; - - lm_log(lm_ctx, LM_DBG, "sent devinfo flags %#x, num_regions %d, num_irqs" - " %d", dev_info->flags, dev_info->num_regions, dev_info->num_irqs); - - return 0; -} - -/* - * Handles a DMA map/unmap request. - * - * @lm_ctx: LM context - * @size: size, in bytes, of the memory pointed to be @dma_regions - * @map: whether this is a DMA map operation - * @fds: array of file descriptors. It's length must equal the number of DMA - regions, irrespectively if @nr_fds is 0. - * @nr_fds: size of above array. It must be either 0 or exactly match - * the number of DMA regions in @dma_regions. - * @dma_regions: memory that contains the DMA regions to be mapped/unmapped - * - * @returns 0 on success, -errno on failure. - */ -int -handle_dma_map_or_unmap(lm_ctx_t *lm_ctx, uint32_t size, bool map, - int *fds, int nr_fds, - struct vfio_user_dma_region *dma_regions) -{ - int nr_dma_regions; - int ret, i, fdi; - - assert(lm_ctx != NULL); - assert(fds != NULL); - - if (lm_ctx->dma == NULL) { - return 0; - } - - if (size % sizeof(struct vfio_user_dma_region) != 0) { - lm_log(lm_ctx, LM_ERR, "bad size of DMA regions %d", size); - return -EINVAL; - } - - nr_dma_regions = (int)(size / sizeof(struct vfio_user_dma_region)); - - for (i = 0, fdi = 0; i < nr_dma_regions; i++) { - if (map) { - int fd = -1; - if (dma_regions[i].flags == VFIO_USER_F_DMA_REGION_MAPPABLE) { - if (fdi == nr_fds) { - return -EINVAL; - } - fd = fds[fdi++]; - } - - ret = dma_controller_add_region(lm_ctx->dma, - dma_regions[i].addr, - dma_regions[i].size, - fd, - dma_regions[i].offset); - if (ret < 0) { - lm_log(lm_ctx, LM_INF, - "failed to add DMA region %#lx-%#lx offset=%#lx fd=%d: %s", - dma_regions[i].addr, - dma_regions[i].addr + dma_regions[i].size - 1, - dma_regions[i].offset, fd, - strerror(-ret)); - } else { - lm_log(lm_ctx, LM_DBG, - "added DMA region %#lx-%#lx offset=%#lx fd=%d", - dma_regions[i].addr, - dma_regions[i].addr + dma_regions[i].size - 1, - dma_regions[i].offset, fd); - } - } else { - ret = dma_controller_remove_region(lm_ctx->dma, - dma_regions[i].addr, - dma_regions[i].size, - lm_ctx->unmap_dma, lm_ctx->pvt); - if (ret < 0) { - lm_log(lm_ctx, LM_INF, - "failed to remove DMA region %#lx-%#lx: %s", - dma_regions[i].addr, - dma_regions[i].addr + dma_regions[i].size - 1, - strerror(-ret)); - } else { - lm_log(lm_ctx, LM_DBG, - "removed DMA region %#lx-%#lx", - dma_regions[i].addr, - dma_regions[i].addr + dma_regions[i].size - 1); - } - } - if (ret < 0) { - return ret; - } - if (lm_ctx->map_dma != NULL) { - lm_ctx->map_dma(lm_ctx->pvt, dma_regions[i].addr, dma_regions[i].size); - } - } - return 0; -} - -static int -handle_device_reset(lm_ctx_t *lm_ctx) -{ - lm_log(lm_ctx, LM_DBG, "Device reset called by client"); - if (lm_ctx->reset != NULL) { - return lm_ctx->reset(lm_ctx->pvt); - } - return 0; -} - -static int -validate_region_access(lm_ctx_t *lm_ctx, uint32_t size, uint16_t cmd, - struct vfio_user_region_access *region_access) -{ - assert(region_access != NULL); - - if (size < sizeof *region_access) { - lm_log(lm_ctx, LM_ERR, "message size too small (%d)", size); - return -EINVAL; - } - - if (region_access->region > lm_ctx->nr_regions || region_access->count <= 0) { - lm_log(lm_ctx, LM_ERR, "bad region %d and/or count %d", - region_access->region, region_access->count); - return -EINVAL; - } - - if (device_is_stopped_and_copying(lm_ctx->migration) && - !is_migr_reg(lm_ctx, region_access->region)) { - lm_log(lm_ctx, LM_ERR, - "cannot access region %d while device in stop-and-copy state", - region_access->region); - return -EINVAL; - } - - if (cmd == VFIO_USER_REGION_WRITE && - size - sizeof *region_access != region_access->count) - { - lm_log(lm_ctx, LM_ERR, "bad region access, expected %lu, actual %d", - size - sizeof *region_access, region_access->count); - return -EINVAL; - } - - return 0; -} - -static int -handle_region_access(lm_ctx_t *lm_ctx, uint32_t size, uint16_t cmd, - void **data, size_t *len, - struct vfio_user_region_access *region_access) -{ - uint64_t count, offset; - int ret; - char *buf; - - assert(lm_ctx != NULL); - assert(data != NULL); - assert(region_access != NULL); - - ret = validate_region_access(lm_ctx, size, cmd, region_access); - if (ret < 0) { - return ret; - } - - *len = sizeof *region_access; - if (cmd == VFIO_USER_REGION_READ) { - *len += region_access->count; - } - *data = malloc(*len); - if (*data == NULL) { - return -ENOMEM; - } - if (cmd == VFIO_USER_REGION_READ) { - buf = (char*)(((struct vfio_user_region_access*)(*data)) + 1); - } else { - buf = (char*)(region_access + 1); - } - - count = region_access->count; - offset = region_to_offset(region_access->region) + region_access->offset; - - ret = muser_access(lm_ctx, cmd == VFIO_USER_REGION_WRITE, - buf, count, &offset); - if (ret != (int)region_access->count) { - lm_log(lm_ctx, LM_ERR, "failed to %s %#x-%#lx: %d", - cmd == VFIO_USER_REGION_WRITE ? "write" : "read", - region_access->count, - region_access->offset + region_access->count - 1, ret); - /* FIXME we should return whatever has been accessed, not an error */ - if (ret >= 0) { - ret = -EINVAL; - } - return ret; - } - - region_access = *data; - region_access->count = ret; - - return 0; -} - -static int -handle_dirty_pages_get(lm_ctx_t *lm_ctx, - struct iovec **iovecs, size_t *nr_iovecs, - struct vfio_iommu_type1_dirty_bitmap_get *ranges, - uint32_t size) -{ - int ret = -EINVAL; - size_t i; - - assert(lm_ctx != NULL); - assert(iovecs != NULL); - assert(nr_iovecs != NULL); - assert(ranges != NULL); - - if (size % sizeof(struct vfio_iommu_type1_dirty_bitmap_get) != 0) { - return -EINVAL; - } - *nr_iovecs = 1 + size / sizeof(struct vfio_iommu_type1_dirty_bitmap_get); - *iovecs = malloc(*nr_iovecs * sizeof(struct iovec)); - if (*iovecs == NULL) { - return -ENOMEM; - } - - for (i = 1; i < *nr_iovecs; i++) { - struct vfio_iommu_type1_dirty_bitmap_get *r = &ranges[(i - 1)]; /* FIXME ugly indexing */ - ret = dma_controller_dirty_page_get(lm_ctx->dma, r->iova, r->size, - r->bitmap.pgsize, r->bitmap.size, - (char**)&((*iovecs)[i].iov_base)); - if (ret != 0) { - goto out; - } - (*iovecs)[i].iov_len = r->bitmap.size; - } -out: - if (ret != 0) { - if (*iovecs != NULL) { - free(*iovecs); - *iovecs = NULL; - } - } - return ret; -} - -static int -handle_dirty_pages(lm_ctx_t *lm_ctx, uint32_t size, - struct iovec **iovecs, size_t *nr_iovecs, - struct vfio_iommu_type1_dirty_bitmap *dirty_bitmap) -{ - int ret; - - assert(lm_ctx != NULL); - assert(iovecs != NULL); - assert(nr_iovecs != NULL); - assert(dirty_bitmap != NULL); - - if (size < sizeof *dirty_bitmap || size != dirty_bitmap->argsz) { - lm_log(lm_ctx, LM_ERR, "invalid header size %u", size); - return -EINVAL; - } - - if (dirty_bitmap->flags & VFIO_IOMMU_DIRTY_PAGES_FLAG_START) { - ret = dma_controller_dirty_page_logging_start(lm_ctx->dma, - migration_get_pgsize(lm_ctx->migration)); - } else if (dirty_bitmap->flags & VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP) { - ret = dma_controller_dirty_page_logging_stop(lm_ctx->dma); - } else if (dirty_bitmap->flags & VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP) { - ret = handle_dirty_pages_get(lm_ctx, iovecs, nr_iovecs, - (struct vfio_iommu_type1_dirty_bitmap_get*)(dirty_bitmap + 1), - size - sizeof *dirty_bitmap); - } else { - lm_log(lm_ctx, LM_ERR, "bad flags %#x", dirty_bitmap->flags); - ret = -EINVAL; - } - - return ret; -} - -/* - * FIXME return value is messed up, sometimes we return -1 and set errno while - * other times we return -errno. Fix. - */ - -/* - * Returns 0 if the header is valid, -errno otherwise. - */ -static int -validate_header(lm_ctx_t *lm_ctx, struct vfio_user_header *hdr, size_t size) -{ - assert(hdr != NULL); - - if (size < sizeof hdr) { - lm_log(lm_ctx, LM_ERR, "short header read %ld", size); - return -EINVAL; - } - - if (hdr->flags.type != VFIO_USER_F_TYPE_COMMAND) { - lm_log(lm_ctx, LM_ERR, "header not a request"); - return -EINVAL; - } - - if (hdr->msg_size < sizeof hdr) { - lm_log(lm_ctx, LM_ERR, "bad size in header %d", hdr->msg_size); - return -EINVAL; - } - - return 0; -} - -/* - * Populates @hdr to contain the header for the next command to be processed. - * Stores any passed FDs into @fds and the number in @nr_fds. - * - * Returns 0 if there is no command to process, -errno if an error occured, or - * the number of bytes read. - */ -static int -get_next_command(lm_ctx_t *lm_ctx, struct vfio_user_header *hdr, int *fds, - int *nr_fds) -{ - int ret; - - /* FIXME get request shouldn't set errno, it should return it as -errno */ - ret = lm_ctx->trans->get_request(lm_ctx, hdr, fds, nr_fds); - if (unlikely(ret < 0)) { - if (ret == -EAGAIN || ret == -EWOULDBLOCK) { - return 0; - } - if (ret != -EINTR) { - lm_log(lm_ctx, LM_ERR, "failed to receive request: %s", - strerror(-ret)); - } - return ret; - } - if (unlikely(ret == 0)) { - if (errno == EINTR) { - return -EINTR; - } - if (errno == 0) { - lm_log(lm_ctx, LM_INF, "vfio-user client closed connection"); - } else { - lm_log(lm_ctx, LM_ERR, "end of file: %m"); - } - return -ENOTCONN; - } - return ret; -} - -static int -process_request(lm_ctx_t *lm_ctx) -{ - struct vfio_user_header hdr = { 0, }; - int ret; - int *fds = NULL; - int nr_fds; - struct vfio_irq_info irq_info; - struct vfio_device_info dev_info; - struct vfio_region_info *dev_reg_info = NULL; - struct iovec _iovecs[2] = { { 0, } }; - struct iovec *iovecs = NULL; - size_t nr_iovecs = 0; - bool free_iovec_data = true; - void *cmd_data = NULL; - - assert(lm_ctx != NULL); - - if (device_is_stopped(lm_ctx->migration)) { - return -ESHUTDOWN; - } - - /* - * FIXME if migration device state is VFIO_DEVICE_STATE_STOP then only - * migration-related operations should execute. However, some operations - * are harmless (e.g. get region info). At the minimum we should fail - * accesses to device regions other than the migration region. I'd expect - * DMA unmap and get dirty pages to be required even in the stop-and-copy - * state. - */ - - nr_fds = lm_ctx->client_max_fds; - fds = alloca(nr_fds * sizeof(int)); - - ret = get_next_command(lm_ctx, &hdr, fds, &nr_fds); - if (ret <= 0) { - return ret; - } - - ret = validate_header(lm_ctx, &hdr, ret); - if (ret < 0) { - return ret; - } - - /* - * TODO from now on if an error occurs we still need to reply. Move this - * code into a separate function so that we don't have to use goto. - */ - - hdr.msg_size -= sizeof(hdr); - if (hdr.msg_size > 0) { - cmd_data = malloc(hdr.msg_size); - if (cmd_data == NULL) { - ret = -ENOMEM; - goto reply; - } - // FIXME: should be transport op - ret = recv(lm_ctx->conn_fd, cmd_data, hdr.msg_size, 0); - if (ret < 0) { - ret = -errno; - goto reply; - } - if (ret != (int)hdr.msg_size) { - lm_log(lm_ctx, LM_ERR, "short read, expected=%d, actual=%d", - hdr.msg_size, ret); - ret = -EINVAL; - goto reply; - } - } - - if (device_is_stopped_and_copying(lm_ctx->migration) - && !(hdr.cmd == VFIO_USER_REGION_READ || hdr.cmd == VFIO_USER_REGION_WRITE)) { - lm_log(lm_ctx, LM_ERR, - "bad command %d while device in stop-and-copy state", hdr.cmd); - ret = -EINVAL; - goto reply; - } - - switch (hdr.cmd) { - case VFIO_USER_DMA_MAP: - case VFIO_USER_DMA_UNMAP: - ret = handle_dma_map_or_unmap(lm_ctx, hdr.msg_size, - hdr.cmd == VFIO_USER_DMA_MAP, - fds, nr_fds, cmd_data); - break; - case VFIO_USER_DEVICE_GET_INFO: - ret = handle_device_get_info(lm_ctx, hdr.msg_size, &dev_info); - if (ret >= 0) { - _iovecs[1].iov_base = &dev_info; - _iovecs[1].iov_len = dev_info.argsz; - iovecs = _iovecs; - nr_iovecs = 2; - } - break; - case VFIO_USER_DEVICE_GET_REGION_INFO: - ret = handle_device_get_region_info(lm_ctx, hdr.msg_size, cmd_data, - &dev_reg_info); - if (ret == 0) { - _iovecs[1].iov_base = dev_reg_info; - _iovecs[1].iov_len = dev_reg_info->argsz; - iovecs = _iovecs; - nr_iovecs = 2; - } - break; - case VFIO_USER_DEVICE_GET_IRQ_INFO: - ret = handle_device_get_irq_info(lm_ctx, hdr.msg_size, cmd_data, - &irq_info); - if (ret == 0) { - _iovecs[1].iov_base = &irq_info; - _iovecs[1].iov_len = sizeof irq_info; - iovecs = _iovecs; - nr_iovecs = 2; - } - break; - case VFIO_USER_DEVICE_SET_IRQS: - ret = handle_device_set_irqs(lm_ctx, hdr.msg_size, fds, nr_fds, - cmd_data); - break; - case VFIO_USER_REGION_READ: - case VFIO_USER_REGION_WRITE: - iovecs = _iovecs; - ret = handle_region_access(lm_ctx, hdr.msg_size, hdr.cmd, - &iovecs[1].iov_base, &iovecs[1].iov_len, - cmd_data); - nr_iovecs = 2; - break; - case VFIO_USER_DEVICE_RESET: - ret = handle_device_reset(lm_ctx); - break; - case VFIO_USER_DIRTY_PAGES: - // FIXME: don't allow migration calls if migration == NULL - ret = handle_dirty_pages(lm_ctx, hdr.msg_size, &iovecs, &nr_iovecs, - cmd_data); - if (ret >= 0) { - free_iovec_data = false; - } - break; - default: - lm_log(lm_ctx, LM_ERR, "bad command %d", hdr.cmd); - ret = -EINVAL; - goto reply; - } - -reply: - /* - * TODO: In case of error during command handling set errno respectively - * in the reply message. - */ - if (ret < 0) { - lm_log(lm_ctx, LM_ERR, "failed to handle command %d: %s", hdr.cmd, - strerror(-ret)); - } else { - ret = 0; - } - - // FIXME: SPEC: should the reply include the command? I'd say yes? - ret = vfio_user_send_iovec(lm_ctx->conn_fd, hdr.msg_id, true, - 0, iovecs, nr_iovecs, NULL, 0, -ret); - if (unlikely(ret < 0)) { - lm_log(lm_ctx, LM_ERR, "failed to complete command: %s", - strerror(-ret)); - } - if (iovecs != NULL && iovecs != _iovecs) { - if (free_iovec_data) { - size_t i; - for (i = 0; i < nr_iovecs; i++) { - free(iovecs[i].iov_base); - } - } - free(iovecs); - } - free(cmd_data); - - return ret; -} - -static int prepare_ctx(lm_ctx_t *lm_ctx) -{ - lm_reg_info_t *cfg_reg; - const lm_reg_info_t zero_reg = { 0 }; - int err; - uint32_t max_ivs = 0, i; - size_t size; - - if (lm_ctx->ready != 0) { - return 0; - } - - // Attach to the muser control device. With LM_FLAG_ATTACH_NB caller is - // always expected to call lm_ctx_try_attach(). - if ((lm_ctx->flags & LM_FLAG_ATTACH_NB) == 0) { - lm_ctx->conn_fd = lm_ctx->trans->attach(lm_ctx); - if (lm_ctx->conn_fd < 0) { - err = lm_ctx->conn_fd; - if (err != EINTR) { - lm_log(lm_ctx, LM_ERR, "failed to attach: %s", - strerror(-err)); - } - return err; - } - } - - cfg_reg = &lm_ctx->reg_info[LM_PCI_DEV_CFG_REGION_IDX]; - - // Set a default config region if none provided. - /* TODO should it be enough to check that the size of region is 0? */ - if (memcmp(cfg_reg, &zero_reg, sizeof(*cfg_reg)) == 0) { - cfg_reg->flags = LM_REG_FLAG_RW; - cfg_reg->size = PCI_CFG_SPACE_SIZE; - } - - // This maybe allocated by lm_setup_pci_config_hdr(). - if (lm_ctx->pci_config_space == NULL) { - lm_ctx->pci_config_space = calloc(1, cfg_reg->size); - if (lm_ctx->pci_config_space == NULL) { - return -ENOMEM; - } - } - - // Set type for region registers. - for (i = 0; i < PCI_BARS_NR; i++) { - if (!(lm_ctx->reg_info[i].flags & LM_REG_FLAG_MEM)) { - lm_ctx->pci_config_space->hdr.bars[i].io.region_type |= 0x1; - } - } - - if (lm_ctx->irqs == NULL) { - /* - * FIXME need to check that the number of MSI and MSI-X IRQs are valid - * (1, 2, 4, 8, 16 or 32 for MSI and up to 2048 for MSI-X). - */ - - // Work out highest count of irq vectors. - for (i = 0; i < LM_DEV_NUM_IRQS; i++) { - if (max_ivs < lm_ctx->irq_count[i]) { - max_ivs = lm_ctx->irq_count[i]; - } - } - - //FIXME: assert(max_ivs > 0)? - size = sizeof(int) * max_ivs; - lm_ctx->irqs = calloc(1, sizeof(lm_irqs_t) + size); - if (lm_ctx->irqs == NULL) { - // lm_ctx->pci_config_space should be free'ed by lm_destroy_ctx(). - return -ENOMEM; - } - - // Set context irq information. - for (i = 0; i < max_ivs; i++) { - lm_ctx->irqs->efds[i] = -1; - } - lm_ctx->irqs->err_efd = -1; - lm_ctx->irqs->req_efd = -1; - lm_ctx->irqs->type = IRQ_NONE; - lm_ctx->irqs->max_ivs = max_ivs; - - // Reflect on the config space whether INTX is available. - if (lm_ctx->irq_count[LM_DEV_INTX_IRQ] != 0) { - lm_ctx->pci_config_space->hdr.intr.ipin = 1; // INTA# - } - } - - if (lm_ctx->caps != NULL) { - lm_ctx->pci_config_space->hdr.sts.cl = 0x1; - lm_ctx->pci_config_space->hdr.cap = PCI_STD_HEADER_SIZEOF; - } - lm_ctx->ready = 1; - - return 0; -} - -int -lm_ctx_drive(lm_ctx_t *lm_ctx) -{ - int err; - - if (lm_ctx == NULL) { - return ERROR(EINVAL); - } - - err = prepare_ctx(lm_ctx); - if (err < 0) { - return ERROR(-err); - } - - do { - err = process_request(lm_ctx); - } while (err >= 0); - - return err; -} - -int -lm_ctx_poll(lm_ctx_t *lm_ctx) -{ - int err; - - if (unlikely((lm_ctx->flags & LM_FLAG_ATTACH_NB) == 0)) { - return -ENOTSUP; - } - - assert(lm_ctx->ready == 1); - err = process_request(lm_ctx); - - return err >= 0 ? 0 : err; -} - -/* FIXME this is not enough anymore, check muser_mmap */ -void * -lm_mmap(lm_ctx_t *lm_ctx, off_t offset, size_t length) -{ - if ((lm_ctx == NULL) || (length == 0) || !PAGE_ALIGNED(offset)) { - if (lm_ctx != NULL) { - lm_log(lm_ctx, LM_DBG, "bad device mmap region %#lx-%#lx\n", - offset, offset + length); - } - errno = EINVAL; - return MAP_FAILED; - } - - return mmap(NULL, length, PROT_READ | PROT_WRITE, MAP_SHARED, - lm_ctx->fd, offset); -} - -static void -free_sparse_mmap_areas(lm_ctx_t *lm_ctx) -{ - int i; - - assert(lm_ctx != NULL); - - for (i = 0; i < (int)lm_ctx->nr_regions; i++) - free(lm_ctx->reg_info[i].mmap_areas); -} - -void -lm_ctx_destroy(lm_ctx_t *lm_ctx) -{ - - if (lm_ctx == NULL) { - return; - } - - free(lm_ctx->uuid); - free(lm_ctx->pci_config_space); - if (lm_ctx->trans->detach != NULL) { - lm_ctx->trans->detach(lm_ctx); - } - if (lm_ctx->dma != NULL) { - dma_controller_destroy(lm_ctx->dma); - } - free_sparse_mmap_areas(lm_ctx); - free(lm_ctx->reg_info); - free(lm_ctx->caps); - free(lm_ctx->migration); - free(lm_ctx->irqs); - free(lm_ctx); - // FIXME: Maybe close any open irq efds? Unmap stuff? -} - -struct lm_sparse_mmap_areas* -copy_sparse_mmap_area(struct lm_sparse_mmap_areas *src) -{ - struct lm_sparse_mmap_areas *dest; - size_t size; - - assert(src != NULL); - - size = sizeof(*dest) + (src->nr_mmap_areas * sizeof(struct lm_mmap_area)); - dest = calloc(1, size); - if (dest != NULL) { - memcpy(dest, src, size); - } - return dest; -} - -int -lm_ctx_try_attach(lm_ctx_t *lm_ctx) -{ - int err; - - assert(lm_ctx != NULL); - - if ((lm_ctx->flags & LM_FLAG_ATTACH_NB) == 0) { - return ERROR(EINVAL); - } - - err = prepare_ctx(lm_ctx); - if (err < 0) { - return ERROR(-err); - } - - return lm_ctx->trans->attach(lm_ctx); -} - -lm_ctx_t *lm_create_ctx(lm_trans_t trans, const char *path, int flags, - void *pvt) -{ - lm_ctx_t *lm_ctx = NULL; - int err = 0; - - if (trans != LM_TRANS_SOCK) { - errno = ENOTSUP; - return NULL; - } - - lm_ctx = calloc(1, sizeof(lm_ctx_t)); - if (lm_ctx == NULL) { - return NULL; - } - lm_ctx->trans = &sock_transport_ops; - - //FIXME: Validate arguments. - // Set other context data. - lm_ctx->pvt = pvt; - lm_ctx->flags = flags; - lm_ctx->log_lvl = LM_ERR; - - lm_ctx->uuid = strdup(path); - if (lm_ctx->uuid == NULL) { - err = errno; - goto out; - } - - /* - * FIXME: Now we always allocate for migration region. Check if its better - * to seperate migration region from standard regions in lm_ctx.reg_info - * and move it into lm_ctx.migration. - */ - lm_ctx->nr_regions = LM_PCI_DEV_NUM_REGIONS + 1; - lm_ctx->reg_info = calloc(lm_ctx->nr_regions, sizeof *lm_ctx->reg_info); - if (lm_ctx->reg_info == NULL) { - err = -ENOMEM; - goto out; - } - - if (lm_ctx->trans->init != NULL) { - err = lm_ctx->trans->init(lm_ctx); - if (err < 0) { - goto out; - } - lm_ctx->fd = err; - } - err = 0; - -out: - if (err != 0) { - if (lm_ctx != NULL) { - lm_ctx_destroy(lm_ctx); - lm_ctx = NULL; - } - errno = -err; - } - - return lm_ctx; -} - -int lm_setup_log(lm_ctx_t *lm_ctx, lm_log_fn_t *log, lm_log_lvl_t log_lvl) -{ - - if (log_lvl != LM_ERR && log_lvl != LM_INF && log_lvl != LM_DBG) { - return ERROR(EINVAL); - } - - lm_ctx->log = log; - lm_ctx->log_lvl = log_lvl; - - return 0; -} - -int lm_pci_setup_config_hdr(lm_ctx_t *lm_ctx, lm_pci_hdr_id_t id, - lm_pci_hdr_ss_t ss, lm_pci_hdr_cc_t cc, - UNUSED bool extended) -{ - lm_pci_config_space_t *config_space; - - assert(lm_ctx != NULL); - - if (lm_ctx->pci_config_space != NULL) { - lm_log(lm_ctx, LM_ERR, "pci header already setup"); - return ERROR(EEXIST); - } - - /* TODO: supported extended PCI config space. */ - - // Allocate a buffer for the config space. - config_space = calloc(1, PCI_CFG_SPACE_SIZE); - if (config_space == NULL) { - return ERROR(ENOMEM); - } - - config_space->hdr.id = id; - config_space->hdr.ss = ss; - config_space->hdr.cc = cc; - lm_ctx->pci_config_space = config_space; - - return 0; -} - -int lm_pci_setup_caps(lm_ctx_t *lm_ctx, lm_cap_t **caps, int nr_caps) -{ - int ret; - - assert(lm_ctx != NULL); - - if (lm_ctx->caps != NULL) { - lm_log(lm_ctx, LM_ERR, "capabilities are already setup"); - return ERROR(EEXIST); - } - - if (caps == NULL || nr_caps == 0) { - lm_log(lm_ctx, LM_ERR, "Invalid args passed"); - return ERROR(EINVAL); - } - - lm_ctx->caps = caps_create(lm_ctx, caps, nr_caps, &ret); - if (lm_ctx->caps == NULL) { - lm_log(lm_ctx, LM_ERR, "failed to create PCI capabilities: %s", - strerror(ret)); - return ERROR(ret); - } - - return 0; -} - -static int -copy_sparse_mmap_areas(lm_reg_info_t *reg_info, - struct lm_sparse_mmap_areas *mmap_areas) -{ - int nr_mmap_areas; - size_t size; - - if (mmap_areas == NULL) { - return 0; - } - - nr_mmap_areas = mmap_areas->nr_mmap_areas; - size = sizeof(*mmap_areas) + (nr_mmap_areas * sizeof(struct lm_mmap_area)); - reg_info->mmap_areas = calloc(1, size); - if (reg_info->mmap_areas == NULL) { - return -ENOMEM; - } - - memcpy(reg_info->mmap_areas, mmap_areas, size); - - return 0; -} - -static inline bool is_valid_pci_config_space_region(int flags, size_t size) -{ - return flags == LM_REG_FLAG_RW && (size == PCI_CFG_SPACE_SIZE - || size == PCI_CFG_SPACE_EXP_SIZE); -} - -int lm_setup_region(lm_ctx_t *lm_ctx, int region_idx, size_t size, - lm_region_access_cb_t *region_access, int flags, - struct lm_sparse_mmap_areas *mmap_areas, - lm_map_region_cb_t *map) -{ - int ret; - - assert(lm_ctx != NULL); - - switch(region_idx) { - case LM_PCI_DEV_BAR0_REGION_IDX ... LM_PCI_DEV_VGA_REGION_IDX: - // Validate the config region provided. - if (region_idx == LM_PCI_DEV_CFG_REGION_IDX && - !is_valid_pci_config_space_region(flags, size)) { - return ERROR(EINVAL); - } - - lm_ctx->reg_info[region_idx].flags = flags; - lm_ctx->reg_info[region_idx].size = size; - lm_ctx->reg_info[region_idx].fn = region_access; - - if (map != NULL) { - lm_ctx->reg_info[region_idx].map = map; - } - if (mmap_areas) { - ret = copy_sparse_mmap_areas(&lm_ctx->reg_info[region_idx], - mmap_areas); - if (ret < 0) { - return ERROR(-ret); - } - } - break; - default: - lm_log(lm_ctx, LM_ERR, "Invalid region index %d", region_idx); - return ERROR(EINVAL); - } - - return 0; -} - -int lm_setup_device_reset_cb(lm_ctx_t *lm_ctx, lm_reset_cb_t *reset) -{ - - assert(lm_ctx != NULL); - lm_ctx->reset = reset; - - return 0; -} - -int lm_setup_device_dma_cb(lm_ctx_t *lm_ctx, lm_map_dma_cb_t *map_dma, - lm_unmap_dma_cb_t *unmap_dma) -{ - - assert(lm_ctx != NULL); - - lm_ctx->map_dma = map_dma; - lm_ctx->unmap_dma = unmap_dma; - - // Create the internal DMA controller. - if (lm_ctx->unmap_dma != NULL) { - lm_ctx->dma = dma_controller_create(lm_ctx, LM_DMA_REGIONS); - if (lm_ctx->dma == NULL) { - return ERROR(ENOMEM); - } - } - - return 0; -} - -int lm_setup_device_nr_irqs(lm_ctx_t *lm_ctx, enum lm_dev_irq_type type, - uint32_t count) -{ - - assert(lm_ctx != NULL); - - if (type < LM_DEV_INTX_IRQ || type > LM_DEV_REQ_IRQ) { - lm_log(lm_ctx, LM_ERR, "Invalid IRQ index %d, should be between " - "(%d to %d)", type, LM_DEV_INTX_IRQ, - LM_DEV_REQ_IRQ); - return ERROR(EINVAL); - } - - lm_ctx->irq_count[type] = count; - - return 0; -} - -int lm_setup_device_migration(lm_ctx_t *lm_ctx, lm_migration_t *migration) -{ - lm_reg_info_t *migr_reg; - int ret = 0; - - assert(lm_ctx != NULL); - - //FIXME: Validate args. - - if (lm_ctx->migr_reg != NULL) { - lm_log(lm_ctx, LM_ERR, "device migration is already setup"); - return ERROR(EEXIST); - } - - /* FIXME hacky, find a more robust way to allocate a region index */ - migr_reg = &lm_ctx->reg_info[(lm_ctx->nr_regions - 1)]; - - /* FIXME: Are there sparse areas need to be setup flags accordingly */ - ret = copy_sparse_mmap_areas(migr_reg, migration->mmap_areas); - if (ret < 0) { - return ERROR(-ret); - } - - migr_reg->flags = LM_REG_FLAG_RW; - migr_reg->size = sizeof(struct vfio_device_migration_info) + migration->size; - - lm_ctx->migration = init_migration(migration, &ret); - if (lm_ctx->migration == NULL) { - lm_log(lm_ctx, LM_ERR, "failed to initialize device migration"); - free(migr_reg->mmap_areas); - return ERROR(ret); - } - lm_ctx->migr_reg = migr_reg; - - return 0; -} - -/* - * Returns a pointer to the standard part of the PCI configuration space. - */ -inline lm_pci_config_space_t * -lm_get_pci_config_space(lm_ctx_t *lm_ctx) -{ - assert(lm_ctx != NULL); - return lm_ctx->pci_config_space; -} - -/* - * Returns a pointer to the non-standard part of the PCI configuration space. - */ -inline uint8_t * -lm_get_pci_non_std_config_space(lm_ctx_t *lm_ctx) -{ - assert(lm_ctx != NULL); - return (uint8_t *)&lm_ctx->pci_config_space->non_std; -} - -inline lm_reg_info_t * -lm_get_region_info(lm_ctx_t *lm_ctx) -{ - assert(lm_ctx != NULL); - return lm_ctx->reg_info; -} - -inline int -lm_addr_to_sg(lm_ctx_t *lm_ctx, dma_addr_t dma_addr, - uint32_t len, dma_sg_t *sg, int max_sg, int prot) -{ - assert(lm_ctx != NULL); - - if (unlikely(lm_ctx->unmap_dma == NULL)) { - errno = EINVAL; - return -1; - } - return dma_addr_to_sg(lm_ctx->dma, dma_addr, len, sg, max_sg, prot); -} - -inline int -lm_map_sg(lm_ctx_t *lm_ctx, const dma_sg_t *sg, - struct iovec *iov, int cnt) -{ - if (unlikely(lm_ctx->unmap_dma == NULL)) { - errno = EINVAL; - return -1; - } - return dma_map_sg(lm_ctx->dma, sg, iov, cnt); -} - -inline void -lm_unmap_sg(lm_ctx_t *lm_ctx, const dma_sg_t *sg, struct iovec *iov, int cnt) -{ - if (unlikely(lm_ctx->unmap_dma == NULL)) { - return; - } - return dma_unmap_sg(lm_ctx->dma, sg, iov, cnt); -} - -uint8_t * -lm_ctx_get_cap(lm_ctx_t *lm_ctx, uint8_t id) -{ - assert(lm_ctx != NULL); - - return cap_find_by_id(lm_ctx, id); -} - -int -lm_dma_read(lm_ctx_t *lm_ctx, dma_sg_t *sg, void *data) -{ - struct vfio_user_dma_region_access *dma_recv; - struct vfio_user_dma_region_access dma_send; - int recv_size; - int msg_id = 1, ret; - - assert(lm_ctx != NULL); - assert(sg != NULL); - - recv_size = sizeof(*dma_recv) + sg->length; - - dma_recv = calloc(recv_size, 1); - if (dma_recv == NULL) { - return -ENOMEM; - } - - dma_send.addr = sg->dma_addr; - dma_send.count = sg->length; - ret = vfio_user_msg(lm_ctx->conn_fd, msg_id, VFIO_USER_DMA_READ, - &dma_send, sizeof dma_send, NULL, - dma_recv, recv_size); - memcpy(data, dma_recv->data, sg->length); /* FIXME no need for memcpy */ - free(dma_recv); - - return ret; -} - -int -lm_dma_write(lm_ctx_t *lm_ctx, dma_sg_t *sg, void *data) -{ - struct vfio_user_dma_region_access *dma_send, dma_recv; - int send_size = sizeof(*dma_send) + sg->length; - int msg_id = 1, ret; - - assert(lm_ctx != NULL); - assert(sg != NULL); - - dma_send = calloc(send_size, 1); - if (dma_send == NULL) { - return -ENOMEM; - } - dma_send->addr = sg->dma_addr; - dma_send->count = sg->length; - memcpy(dma_send->data, data, sg->length); /* FIXME no need to copy! */ - ret = vfio_user_msg(lm_ctx->conn_fd, msg_id, VFIO_USER_DMA_WRITE, - dma_send, send_size, NULL, - &dma_recv, sizeof(dma_recv)); - free(dma_send); - - return ret; -} - -/* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lib/muser_pci.c b/lib/muser_pci.c deleted file mode 100644 index 9535f21..0000000 --- a/lib/muser_pci.c +++ /dev/null @@ -1,333 +0,0 @@ -/* - * Copyright (c) 2019 Nutanix Inc. All rights reserved. - * - * Authors: Thanos Makatos - * Swapnil Ingle - * Felipe Franciosi - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Nutanix nor the names of its contributors may be - * used to endorse or promote products derived from this software without - * specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH - * DAMAGE. - * - */ - -#include -#include -#include -#include -#include - -#include -#include - -#include "muser.h" -#include "muser_priv.h" -#include "pci.h" -#include "common.h" - -static inline void -muser_pci_hdr_write_bar(lm_ctx_t *lm_ctx, uint16_t bar_index, const char *buf) -{ - uint32_t cfg_addr; - unsigned long mask; - lm_reg_info_t *reg_info = lm_get_region_info(lm_ctx); - lm_pci_hdr_t *hdr; - - assert(lm_ctx != NULL); - - if (reg_info[bar_index].size == 0) { - return; - } - - hdr = &lm_get_pci_config_space(lm_ctx)->hdr; - - cfg_addr = *(uint32_t *) buf; - - lm_log(lm_ctx, LM_DBG, "BAR%d addr 0x%x\n", bar_index, cfg_addr); - - if (cfg_addr == 0xffffffff) { - cfg_addr = ~(reg_info[bar_index].size) + 1; - } - - if ((reg_info[bar_index].flags & LM_REG_FLAG_MEM)) { - mask = PCI_BASE_ADDRESS_MEM_MASK; - } else { - mask = PCI_BASE_ADDRESS_IO_MASK; - } - cfg_addr |= (hdr->bars[bar_index].raw & ~mask); - - hdr->bars[bar_index].raw = htole32(cfg_addr); -} - -#define BAR_INDEX(offset) ((offset - PCI_BASE_ADDRESS_0) >> 2) - -static int -handle_command_write(lm_ctx_t *ctx, lm_pci_config_space_t *pci, - const char *buf, size_t count) -{ - uint16_t v; - - assert(ctx != NULL); - - if (count != 2) { - lm_log(ctx, LM_ERR, "bad write command size %lu\n", count); - return -EINVAL; - } - - assert(pci != NULL); - assert(buf != NULL); - - v = *(uint16_t*)buf; - - if ((v & PCI_COMMAND_IO) == PCI_COMMAND_IO) { - if (!pci->hdr.cmd.iose) { - pci->hdr.cmd.iose = 0x1; - lm_log(ctx, LM_INF, "I/O space enabled\n"); - } - v &= ~PCI_COMMAND_IO; - } else { - if (pci->hdr.cmd.iose) { - pci->hdr.cmd.iose = 0x0; - lm_log(ctx, LM_INF, "I/O space disabled\n"); - } - } - - if ((v & PCI_COMMAND_MEMORY) == PCI_COMMAND_MEMORY) { - if (!pci->hdr.cmd.mse) { - pci->hdr.cmd.mse = 0x1; - lm_log(ctx, LM_INF, "memory space enabled\n"); - } - v &= ~PCI_COMMAND_MEMORY; - } else { - if (pci->hdr.cmd.mse) { - pci->hdr.cmd.mse = 0x0; - lm_log(ctx, LM_INF, "memory space disabled\n"); - } - } - - if ((v & PCI_COMMAND_MASTER) == PCI_COMMAND_MASTER) { - if (!pci->hdr.cmd.bme) { - pci->hdr.cmd.bme = 0x1; - lm_log(ctx, LM_INF, "bus master enabled\n"); - } - v &= ~PCI_COMMAND_MASTER; - } else { - if (pci->hdr.cmd.bme) { - pci->hdr.cmd.bme = 0x0; - lm_log(ctx, LM_INF, "bus master disabled\n"); - } - } - - if ((v & PCI_COMMAND_SERR) == PCI_COMMAND_SERR) { - if (!pci->hdr.cmd.see) { - pci->hdr.cmd.see = 0x1; - lm_log(ctx, LM_INF, "SERR# enabled\n"); - } - v &= ~PCI_COMMAND_SERR; - } else { - if (pci->hdr.cmd.see) { - pci->hdr.cmd.see = 0x0; - lm_log(ctx, LM_INF, "SERR# disabled\n"); - } - } - - if ((v & PCI_COMMAND_INTX_DISABLE) == PCI_COMMAND_INTX_DISABLE) { - if (!pci->hdr.cmd.id) { - pci->hdr.cmd.id = 0x1; - lm_log(ctx, LM_INF, "INTx emulation disabled\n"); - } - v &= ~PCI_COMMAND_INTX_DISABLE; - } else { - if (pci->hdr.cmd.id) { - pci->hdr.cmd.id = 0x0; - lm_log(ctx, LM_INF, "INTx emulation enabled\n"); - } - } - - if ((v & PCI_COMMAND_INVALIDATE) == PCI_COMMAND_INVALIDATE) { - if (!pci->hdr.cmd.mwie) { - pci->hdr.cmd.mwie = 1U; - lm_log(ctx, LM_INF, "memory write and invalidate enabled\n"); - } - v &= ~PCI_COMMAND_INVALIDATE; - } else { - if (pci->hdr.cmd.mwie) { - pci->hdr.cmd.mwie = 0; - lm_log(ctx, LM_INF, "memory write and invalidate disabled"); - } - } - - if ((v & PCI_COMMAND_VGA_PALETTE) == PCI_COMMAND_VGA_PALETTE) { - lm_log(ctx, LM_INF, "enabling VGA palette snooping ignored\n"); - v &= ~PCI_COMMAND_VGA_PALETTE; - } - - if (v != 0) { - lm_log(ctx, LM_ERR, "unconsumed command flags %x\n", v); - return -EINVAL; - } - - return 0; -} - -static int -handle_erom_write(lm_ctx_t *ctx, lm_pci_config_space_t *pci, - const char *buf, size_t count) -{ - uint32_t v; - - assert(ctx != NULL); - assert(pci != NULL); - - if (count != 0x4) { - lm_log(ctx, LM_ERR, "bad EROM count %lu\n", count); - return -EINVAL; - } - v = *(uint32_t*)buf; - - if (v == (uint32_t)PCI_ROM_ADDRESS_MASK) { - lm_log(ctx, LM_INF, "write mask to EROM ignored\n"); - } else if (v == 0) { - lm_log(ctx, LM_INF, "cleared EROM\n"); - pci->hdr.erom = 0; - } else if (v == (uint32_t)~PCI_ROM_ADDRESS_ENABLE) { - lm_log(ctx, LM_INF, "EROM disable ignored\n"); - } else { - lm_log(ctx, LM_ERR, "bad write to EROM 0x%x bytes\n", v); - return -EINVAL; - } - return 0; -} - -static inline int -muser_pci_hdr_write(lm_ctx_t *lm_ctx, uint16_t offset, - const char *buf, size_t count) -{ - lm_pci_config_space_t *pci; - int ret = 0; - - assert(lm_ctx != NULL); - assert(buf != NULL); - - pci = lm_get_pci_config_space(lm_ctx); - - switch (offset) { - case PCI_COMMAND: - ret = handle_command_write(lm_ctx, pci, buf, count); - break; - case PCI_STATUS: - lm_log(lm_ctx, LM_INF, "write to status ignored\n"); - break; - case PCI_INTERRUPT_PIN: - lm_log(lm_ctx, LM_ERR, "attempt to write read-only field IPIN\n"); - ret = -EINVAL; - break; - case PCI_INTERRUPT_LINE: - pci->hdr.intr.iline = buf[0]; - lm_log(lm_ctx, LM_DBG, "ILINE=%0x\n", pci->hdr.intr.iline); - break; - case PCI_LATENCY_TIMER: - pci->hdr.mlt = (uint8_t)buf[0]; - lm_log(lm_ctx, LM_INF, "set to latency timer to %hhx\n", pci->hdr.mlt); - break; - case PCI_BASE_ADDRESS_0: - case PCI_BASE_ADDRESS_1: - case PCI_BASE_ADDRESS_2: - case PCI_BASE_ADDRESS_3: - case PCI_BASE_ADDRESS_4: - case PCI_BASE_ADDRESS_5: - muser_pci_hdr_write_bar(lm_ctx, BAR_INDEX(offset), buf); - break; - case PCI_ROM_ADDRESS: - ret = handle_erom_write(lm_ctx, pci, buf, count); - break; - default: - lm_log(lm_ctx, LM_INF, "PCI config write %#x-%#lx not handled\n", - offset, offset + count); - ret = -EINVAL; - } - -#ifdef LM_VERBOSE_LOGGING - dump_buffer("PCI header", (char*)pci->hdr.raw, 0xff); -#endif - - return ret; -} - -/* - * @pci_hdr: the PCI header - * @reg_info: region info - * @rw: the command - * @write: whether this is a PCI header write - * @count: output parameter that receives the number of bytes read/written - */ -static inline int -muser_do_pci_hdr_access(lm_ctx_t *lm_ctx, uint32_t *count, - uint64_t *pos, bool is_write, - char *buf) -{ - uint32_t _count; - loff_t _pos; - int err = 0; - - assert(lm_ctx != NULL); - assert(count != NULL); - assert(pos != NULL); - assert(buf != NULL); - - _pos = *pos - region_to_offset(LM_PCI_DEV_CFG_REGION_IDX); - _count = MIN(*count, PCI_STD_HEADER_SIZEOF - _pos); - - if (is_write) { - err = muser_pci_hdr_write(lm_ctx, _pos, buf, _count); - } else { - memcpy(buf, lm_get_pci_config_space(lm_ctx)->hdr.raw + _pos, _count); - } - *pos += _count; - *count -= _count; - return err; -} - -static inline bool -muser_is_pci_hdr_access(uint64_t pos) -{ - const uint64_t off = region_to_offset(LM_PCI_DEV_CFG_REGION_IDX); - return pos >= off && pos - off < PCI_STD_HEADER_SIZEOF; -} - -/* FIXME this function is misleading, remove it */ -int -muser_pci_hdr_access(lm_ctx_t *lm_ctx, uint32_t *count, - uint64_t *pos, bool is_write, - char *buf) -{ - assert(lm_ctx != NULL); - assert(count != NULL); - assert(pos != NULL); - - if (!muser_is_pci_hdr_access(*pos)) { - return 0; - } - return muser_do_pci_hdr_access(lm_ctx, count, pos, is_write, buf); -} - -/* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lib/muser_priv.h b/lib/muser_priv.h deleted file mode 100644 index ea44237..0000000 --- a/lib/muser_priv.h +++ /dev/null @@ -1,217 +0,0 @@ -/* - * Copyright (c) 2019 Nutanix Inc. All rights reserved. - * - * Authors: Thanos Makatos - * Swapnil Ingle - * Felipe Franciosi - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of Nutanix nor the names of its contributors may be - * used to endorse or promote products derived from this software without - * specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH - * DAMAGE. - * - */ - -#ifndef LIB_MUSER_PRIV_H -#define LIB_MUSER_PRIV_H - -#include "dma.h" - -#ifdef LM_VERBOSE_LOGGING -void -dump_buffer(const char *prefix, const char *buf, uint32_t count); -#else -#define dump_buffer(prefix, buf, count) -#endif - -struct transport_ops { - int (*init)(lm_ctx_t*); - int (*attach)(lm_ctx_t*); - int(*detach)(lm_ctx_t*); - int (*get_request)(lm_ctx_t*, struct vfio_user_header*, int *fds, int *nr_fds); -}; - -typedef enum { - IRQ_NONE = 0, - IRQ_INTX, - IRQ_MSI, - IRQ_MSIX, -} irq_type_t; - -typedef struct { - irq_type_t type; /* irq type this device is using */ - int err_efd; /* eventfd for irq err */ - int req_efd; /* eventfd for irq req */ - uint32_t max_ivs; /* maximum number of ivs supported */ - int efds[0]; /* XXX must be last */ -} lm_irqs_t; - -struct migration; - -typedef struct { - - /* - * Region flags, see LM_REG_FLAG_XXX above. - */ - uint32_t flags; - - /* - * Size of the region. - */ - uint32_t size; - - /* - * Callback function that is called when the region is read or written. - * Note that the memory of the region is owned by the user, except for the - * standard header (first 64 bytes) of the PCI configuration space. - */ - lm_region_access_cb_t *fn; - - /* - * Callback function that is called when the region is memory mapped. - * Required if LM_REG_FLAG_MEM is set, otherwise ignored. - */ - lm_map_region_cb_t *map; - struct lm_sparse_mmap_areas *mmap_areas; /* sparse mmap areas */ -} lm_reg_info_t; - -struct lm_ctx { - void *pvt; - dma_controller_t *dma; - int fd; - int conn_fd; - lm_reset_cb_t *reset; - lm_log_lvl_t log_lvl; - lm_log_fn_t *log; - size_t nr_regions; - lm_reg_info_t *reg_info; - lm_pci_config_space_t *pci_config_space; - struct transport_ops *trans; - struct caps *caps; - uint64_t flags; - char *uuid; - lm_map_dma_cb_t *map_dma; - lm_unmap_dma_cb_t *unmap_dma; - - /* TODO there should be a void * variable to store transport-specific stuff */ - /* LM_TRANS_SOCK */ - int sock_flags; - - int client_max_fds; - - lm_reg_info_t *migr_reg; - struct migration *migration; - - uint32_t irq_count[LM_DEV_NUM_IRQS]; - lm_irqs_t *irqs; - int ready; -}; - -int -muser_pci_hdr_access(lm_ctx_t *lm_ctx, uint32_t *count, - uint64_t *pos, bool write, char *buf); - -lm_reg_info_t * -lm_get_region_info(lm_ctx_t *lm_ctx); - -uint64_t -region_to_offset(uint32_t region); - -/* FIXME copied from include/linux/stddef.h, is this OK license-wise? */ -#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER)) -#define offsetofend(TYPE, MEMBER) \ - (offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER)) - -static inline ssize_t -get_minsz(unsigned int cmd) -{ - switch (cmd) { - case VFIO_DEVICE_GET_INFO: - return offsetofend(struct vfio_device_info, num_irqs); - case VFIO_DEVICE_GET_REGION_INFO: - return offsetofend(struct vfio_region_info, offset); - case VFIO_DEVICE_GET_IRQ_INFO: - return offsetofend(struct vfio_irq_info, count); - case VFIO_DEVICE_SET_IRQS: - return offsetofend(struct vfio_irq_set, count); - case VFIO_GROUP_GET_STATUS: - return offsetofend(struct vfio_group_status, flags); - case VFIO_GET_API_VERSION: - return 0; - case VFIO_CHECK_EXTENSION: - case VFIO_GROUP_SET_CONTAINER: - case VFIO_GROUP_UNSET_CONTAINER: - case VFIO_SET_IOMMU: - return sizeof(int); - case VFIO_IOMMU_GET_INFO: - return offsetofend(struct vfio_iommu_type1_info, iova_pgsizes); - case VFIO_IOMMU_MAP_DMA: - return offsetofend(struct vfio_iommu_type1_dma_map, size); - case VFIO_IOMMU_UNMAP_DMA: - return offsetofend(struct vfio_iommu_type1_dma_unmap, size); - case VFIO_GROUP_GET_DEVICE_FD: - case VFIO_DEVICE_RESET: - return 0; - } - return -EOPNOTSUPP; -} - -static inline const char * -vfio_cmd_to_str(int cmd) { - switch (cmd) { - case VFIO_GET_API_VERSION: return "VFIO_GET_API_VERSION"; - case VFIO_CHECK_EXTENSION: return "VFIO_CHECK_EXTENSION"; - case VFIO_SET_IOMMU: return "VFIO_SET_IOMMU"; - case VFIO_GROUP_GET_STATUS: return "VFIO_GROUP_GET_STATUS"; - case VFIO_GROUP_SET_CONTAINER: return "VFIO_GROUP_SET_CONTAINER"; - case VFIO_GROUP_UNSET_CONTAINER: return "VFIO_GROUP_UNSET_CONTAINER"; - case VFIO_GROUP_GET_DEVICE_FD: return "VFIO_GROUP_GET_DEVICE_FD"; - case VFIO_DEVICE_GET_INFO: return "VFIO_DEVICE_GET_INFO"; - case VFIO_DEVICE_GET_REGION_INFO: return "VFIO_DEVICE_GET_REGION_INFO"; - case VFIO_DEVICE_GET_IRQ_INFO: return "VFIO_DEVICE_GET_IRQ_INFO"; - case VFIO_DEVICE_SET_IRQS: return "VFIO_DEVICE_SET_IRQS"; - case VFIO_DEVICE_RESET: return "VFIO_DEVICE_RESET"; - case VFIO_IOMMU_GET_INFO: return "VFIO_IOMMU_GET_INFO/VFIO_DEVICE_GET_PCI_HOT_RESET_INFO/VFIO_IOMMU_SPAPR_TCE_GET_INFO"; - case VFIO_IOMMU_MAP_DMA: return "VFIO_IOMMU_MAP_DMA/VFIO_DEVICE_PCI_HOT_RESET"; - case VFIO_IOMMU_UNMAP_DMA: return "VFIO_IOMMU_UNMAP_DMA"; - case VFIO_IOMMU_ENABLE: return "VFIO_IOMMU_ENABLE"; - case VFIO_IOMMU_DISABLE: return "VFIO_IOMMU_DISABLE"; - case VFIO_EEH_PE_OP: return "VFIO_EEH_PE_OP"; - case VFIO_IOMMU_SPAPR_REGISTER_MEMORY: return "VFIO_IOMMU_SPAPR_REGISTER_MEMORY"; - case VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY: return "VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY"; - case VFIO_IOMMU_SPAPR_TCE_CREATE: return "VFIO_IOMMU_SPAPR_TCE_CREATE"; - case VFIO_IOMMU_SPAPR_TCE_REMOVE: return "VFIO_IOMMU_SPAPR_TCE_REMOVE"; - } - return NULL; -} - -int -handle_dma_map_or_unmap(lm_ctx_t *lm_ctx, uint32_t size, bool map, - int *fds, int nr_fds, - struct vfio_user_dma_region *dma_regions); - -void -_dma_controller_do_remove_region(dma_controller_t *dma, - dma_memory_region_t *region); - -#endif /* LIB_MUSER_PRIV_H */ - -/* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lib/pci.c b/lib/pci.c new file mode 100644 index 0000000..0a944e3 --- /dev/null +++ b/lib/pci.c @@ -0,0 +1,332 @@ +/* + * Copyright (c) 2019 Nutanix Inc. All rights reserved. + * + * Authors: Thanos Makatos + * Swapnil Ingle + * Felipe Franciosi + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Nutanix nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + */ + +#include +#include +#include +#include +#include + +#include +#include + +#include "common.h" +#include "libvfio-user.h" +#include "pci.h" +#include "private.h" + +static inline void +vfu_pci_hdr_write_bar(vfu_ctx_t *vfu_ctx, uint16_t bar_index, const char *buf) +{ + uint32_t cfg_addr; + unsigned long mask; + vfu_reg_info_t *reg_info = vfu_get_region_info(vfu_ctx); + vfu_pci_hdr_t *hdr; + + assert(vfu_ctx != NULL); + + if (reg_info[bar_index].size == 0) { + return; + } + + hdr = &vfu_pci_get_config_space(vfu_ctx)->hdr; + + cfg_addr = *(uint32_t *) buf; + + vfu_log(vfu_ctx, VFU_DBG, "BAR%d addr 0x%x\n", bar_index, cfg_addr); + + if (cfg_addr == 0xffffffff) { + cfg_addr = ~(reg_info[bar_index].size) + 1; + } + + if ((reg_info[bar_index].flags & VFU_REG_FLAG_MEM)) { + mask = PCI_BASE_ADDRESS_MEM_MASK; + } else { + mask = PCI_BASE_ADDRESS_IO_MASK; + } + cfg_addr |= (hdr->bars[bar_index].raw & ~mask); + + hdr->bars[bar_index].raw = htole32(cfg_addr); +} + +#define BAR_INDEX(offset) ((offset - PCI_BASE_ADDRESS_0) >> 2) + +static int +handle_command_write(vfu_ctx_t *ctx, vfu_pci_config_space_t *pci, + const char *buf, size_t count) +{ + uint16_t v; + + assert(ctx != NULL); + + if (count != 2) { + vfu_log(ctx, VFU_ERR, "bad write command size %lu\n", count); + return -EINVAL; + } + + assert(pci != NULL); + assert(buf != NULL); + + v = *(uint16_t*)buf; + + if ((v & PCI_COMMAND_IO) == PCI_COMMAND_IO) { + if (!pci->hdr.cmd.iose) { + pci->hdr.cmd.iose = 0x1; + vfu_log(ctx, VFU_INF, "I/O space enabled\n"); + } + v &= ~PCI_COMMAND_IO; + } else { + if (pci->hdr.cmd.iose) { + pci->hdr.cmd.iose = 0x0; + vfu_log(ctx, VFU_INF, "I/O space disabled\n"); + } + } + + if ((v & PCI_COMMAND_MEMORY) == PCI_COMMAND_MEMORY) { + if (!pci->hdr.cmd.mse) { + pci->hdr.cmd.mse = 0x1; + vfu_log(ctx, VFU_INF, "memory space enabled\n"); + } + v &= ~PCI_COMMAND_MEMORY; + } else { + if (pci->hdr.cmd.mse) { + pci->hdr.cmd.mse = 0x0; + vfu_log(ctx, VFU_INF, "memory space disabled\n"); + } + } + + if ((v & PCI_COMMAND_MASTER) == PCI_COMMAND_MASTER) { + if (!pci->hdr.cmd.bme) { + pci->hdr.cmd.bme = 0x1; + vfu_log(ctx, VFU_INF, "bus master enabled\n"); + } + v &= ~PCI_COMMAND_MASTER; + } else { + if (pci->hdr.cmd.bme) { + pci->hdr.cmd.bme = 0x0; + vfu_log(ctx, VFU_INF, "bus master disabled\n"); + } + } + + if ((v & PCI_COMMAND_SERR) == PCI_COMMAND_SERR) { + if (!pci->hdr.cmd.see) { + pci->hdr.cmd.see = 0x1; + vfu_log(ctx, VFU_INF, "SERR# enabled\n"); + } + v &= ~PCI_COMMAND_SERR; + } else { + if (pci->hdr.cmd.see) { + pci->hdr.cmd.see = 0x0; + vfu_log(ctx, VFU_INF, "SERR# disabled\n"); + } + } + + if ((v & PCI_COMMAND_INTX_DISABLE) == PCI_COMMAND_INTX_DISABLE) { + if (!pci->hdr.cmd.id) { + pci->hdr.cmd.id = 0x1; + vfu_log(ctx, VFU_INF, "INTx emulation disabled\n"); + } + v &= ~PCI_COMMAND_INTX_DISABLE; + } else { + if (pci->hdr.cmd.id) { + pci->hdr.cmd.id = 0x0; + vfu_log(ctx, VFU_INF, "INTx emulation enabled\n"); + } + } + + if ((v & PCI_COMMAND_INVALIDATE) == PCI_COMMAND_INVALIDATE) { + if (!pci->hdr.cmd.mwie) { + pci->hdr.cmd.mwie = 1U; + vfu_log(ctx, VFU_INF, "memory write and invalidate enabled\n"); + } + v &= ~PCI_COMMAND_INVALIDATE; + } else { + if (pci->hdr.cmd.mwie) { + pci->hdr.cmd.mwie = 0; + vfu_log(ctx, VFU_INF, "memory write and invalidate disabled"); + } + } + + if ((v & PCI_COMMAND_VGA_PALETTE) == PCI_COMMAND_VGA_PALETTE) { + vfu_log(ctx, VFU_INF, "enabling VGA palette snooping ignored\n"); + v &= ~PCI_COMMAND_VGA_PALETTE; + } + + if (v != 0) { + vfu_log(ctx, VFU_ERR, "unconsumed command flags %x\n", v); + return -EINVAL; + } + + return 0; +} + +static int +handle_erom_write(vfu_ctx_t *ctx, vfu_pci_config_space_t *pci, + const char *buf, size_t count) +{ + uint32_t v; + + assert(ctx != NULL); + assert(pci != NULL); + + if (count != 0x4) { + vfu_log(ctx, VFU_ERR, "bad EROM count %lu\n", count); + return -EINVAL; + } + v = *(uint32_t*)buf; + + if (v == (uint32_t)PCI_ROM_ADDRESS_MASK) { + vfu_log(ctx, VFU_INF, "write mask to EROM ignored\n"); + } else if (v == 0) { + vfu_log(ctx, VFU_INF, "cleared EROM\n"); + pci->hdr.erom = 0; + } else if (v == (uint32_t)~PCI_ROM_ADDRESS_ENABLE) { + vfu_log(ctx, VFU_INF, "EROM disable ignored\n"); + } else { + vfu_log(ctx, VFU_ERR, "bad write to EROM 0x%x bytes\n", v); + return -EINVAL; + } + return 0; +} + +static inline int +vfu_pci_hdr_write(vfu_ctx_t *vfu_ctx, uint16_t offset, + const char *buf, size_t count) +{ + vfu_pci_config_space_t *pci; + int ret = 0; + + assert(vfu_ctx != NULL); + assert(buf != NULL); + + pci = vfu_pci_get_config_space(vfu_ctx); + + switch (offset) { + case PCI_COMMAND: + ret = handle_command_write(vfu_ctx, pci, buf, count); + break; + case PCI_STATUS: + vfu_log(vfu_ctx, VFU_INF, "write to status ignored\n"); + break; + case PCI_INTERRUPT_PIN: + vfu_log(vfu_ctx, VFU_ERR, "attempt to write read-only field IPIN\n"); + ret = -EINVAL; + break; + case PCI_INTERRUPT_LINE: + pci->hdr.intr.iline = buf[0]; + vfu_log(vfu_ctx, VFU_DBG, "ILINE=%0x\n", pci->hdr.intr.iline); + break; + case PCI_LATENCY_TIMER: + pci->hdr.mlt = (uint8_t)buf[0]; + vfu_log(vfu_ctx, VFU_INF, "set to latency timer to %hhx\n", pci->hdr.mlt); + break; + case PCI_BASE_ADDRESS_0: + case PCI_BASE_ADDRESS_1: + case PCI_BASE_ADDRESS_2: + case PCI_BASE_ADDRESS_3: + case PCI_BASE_ADDRESS_4: + case PCI_BASE_ADDRESS_5: + vfu_pci_hdr_write_bar(vfu_ctx, BAR_INDEX(offset), buf); + break; + case PCI_ROM_ADDRESS: + ret = handle_erom_write(vfu_ctx, pci, buf, count); + break; + default: + vfu_log(vfu_ctx, VFU_INF, "PCI config write %#x-%#lx not handled\n", + offset, offset + count); + ret = -EINVAL; + } + +#ifdef VFU_VERBOSE_LOGGING + dump_buffer("PCI header", (char*)pci->hdr.raw, 0xff); +#endif + + return ret; +} + +/* + * @pci_hdr: the PCI header + * @reg_info: region info + * @rw: the command + * @write: whether this is a PCI header write + * @count: output parameter that receives the number of bytes read/written + */ +static inline int +vfu_do_pci_hdr_access(vfu_ctx_t *vfu_ctx, uint32_t *count, + uint64_t *pos, bool is_write, + char *buf) +{ + uint32_t _count; + loff_t _pos; + int err = 0; + + assert(vfu_ctx != NULL); + assert(count != NULL); + assert(pos != NULL); + assert(buf != NULL); + + _pos = *pos - region_to_offset(VFU_PCI_DEV_CFG_REGION_IDX); + _count = MIN(*count, PCI_STD_HEADER_SIZEOF - _pos); + + if (is_write) { + err = vfu_pci_hdr_write(vfu_ctx, _pos, buf, _count); + } else { + memcpy(buf, vfu_pci_get_config_space(vfu_ctx)->hdr.raw + _pos, _count); + } + *pos += _count; + *count -= _count; + return err; +} + +static inline bool +vfu_is_pci_hdr_access(uint64_t pos) +{ + const uint64_t off = region_to_offset(VFU_PCI_DEV_CFG_REGION_IDX); + return pos >= off && pos - off < PCI_STD_HEADER_SIZEOF; +} + +/* FIXME this function is misleading, remove it */ +int +vfu_pci_hdr_access(vfu_ctx_t *vfu_ctx, uint32_t *count, + uint64_t *pos, bool is_write, char *buf) +{ + assert(vfu_ctx != NULL); + assert(count != NULL); + assert(pos != NULL); + + if (!vfu_is_pci_hdr_access(*pos)) { + return 0; + } + return vfu_do_pci_hdr_access(vfu_ctx, count, pos, is_write, buf); +} + +/* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lib/private.h b/lib/private.h new file mode 100644 index 0000000..9b13949 --- /dev/null +++ b/lib/private.h @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2019 Nutanix Inc. All rights reserved. + * + * Authors: Thanos Makatos + * Swapnil Ingle + * Felipe Franciosi + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Nutanix nor the names of its contributors may be + * used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + * + */ + +#ifndef LIB_VFIO_USER_PRIVATE_H +#define LIB_VFIO_USER_PRIVATE_H + +#include "dma.h" + +#ifdef VFU_VERBOSE_LOGGING +void +dump_buffer(const char *prefix, const char *buf, uint32_t count); +#else +#define dump_buffer(prefix, buf, count) +#endif + +struct transport_ops { + int (*init)(vfu_ctx_t*); + int (*attach)(vfu_ctx_t*); + int(*detach)(vfu_ctx_t*); + int (*get_request)(vfu_ctx_t*, struct vfio_user_header*, + int *fds, int *nr_fds); +}; + +typedef enum { + IRQ_NONE = 0, + IRQ_INTX, + IRQ_MSI, + IRQ_MSIX, +} irq_type_t; + +typedef struct { + irq_type_t type; /* irq type this device is using */ + int err_efd; /* eventfd for irq err */ + int req_efd; /* eventfd for irq req */ + uint32_t max_ivs; /* maximum number of ivs supported */ + int efds[0]; /* XXX must be last */ +} vfu_irqs_t; + +struct migration; + +typedef struct { + + /* + * Region flags, see VFU_REG_FLAG_XXX above. + */ + uint32_t flags; + + /* + * Size of the region. + */ + uint32_t size; + + /* + * Callback function that is called when the region is read or written. + * Note that the memory of the region is owned by the user, except for the + * standard header (first 64 bytes) of the PCI configuration space. + */ + vfu_region_access_cb_t *fn; + + /* + * Callback function that is called when the region is memory mapped. + * Required if VFU_REG_FLAG_MEM is set, otherwise ignored. + */ + vfu_map_region_cb_t *map; + struct vfu_sparse_mmap_areas *mmap_areas; /* sparse mmap areas */ +} vfu_reg_info_t; + +struct vfu_ctx { + void *pvt; + dma_controller_t *dma; + int fd; + int conn_fd; + vfu_reset_cb_t *reset; + vfu_log_lvl_t log_lvl; + vfu_log_fn_t *log; + size_t nr_regions; + vfu_reg_info_t *reg_info; + vfu_pci_config_space_t *pci_config_space; + struct transport_ops *trans; + struct caps *caps; + uint64_t flags; + char *uuid; + vfu_map_dma_cb_t *map_dma; + vfu_unmap_dma_cb_t *unmap_dma; + + /* TODO there should be a void * variable to store transport-specific stuff */ + /* VFU_TRANS_SOCK */ + int sock_flags; + + int client_max_fds; + + vfu_reg_info_t *migr_reg; + struct migration *migration; + + uint32_t irq_count[VFU_DEV_NUM_IRQS]; + vfu_irqs_t *irqs; + int ready; +}; + +int +vfu_pci_hdr_access(vfu_ctx_t *vfu_ctx, uint32_t *count, + uint64_t *pos, bool write, char *buf); + +vfu_reg_info_t * +vfu_get_region_info(vfu_ctx_t *vfu_ctx); + +uint64_t +region_to_offset(uint32_t region); + +int +handle_dma_map_or_unmap(vfu_ctx_t *vfu_ctx, uint32_t size, bool map, + int *fds, int nr_fds, + struct vfio_user_dma_region *dma_regions); + +void +_dma_controller_do_remove_region(dma_controller_t *dma, + dma_memory_region_t *region); + +#endif /* LIB_VFIO_USER_PRIVATE_H */ + +/* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lib/python_bindings.c b/lib/python_bindings.c index b6caa9f..87abc78 100644 --- a/lib/python_bindings.c +++ b/lib/python_bindings.c @@ -32,9 +32,9 @@ #include -#include "muser.h" +#include "libvfio-user.h" -static PyObject *region_access_callbacks[LM_PCI_DEV_NUM_REGIONS]; +static PyObject *region_access_callbacks[VFU_PCI_DEV_NUM_REGIONS]; static int handle_read(char *dst, PyObject *result, int count) @@ -48,7 +48,7 @@ handle_read(char *dst, PyObject *result, int count) } /* - * Function callback called by libmuser. We then call the Python function. + * Function callback called by libvfio-user. We then call the Python function. * * FIXME need a way to provide private pointer. */ @@ -107,7 +107,7 @@ REGION_WRAP(6) REGION_WRAP(7) REGION_WRAP(8) -static ssize_t (*region_access_wraps[LM_PCI_DEV_NUM_REGIONS])(void *, char *, size_t, +static ssize_t (*region_access_wraps[VFU_PCI_DEV_NUM_REGIONS])(void *, char *, size_t, loff_t, bool) = { r_0_wrap, r_1_wrap, @@ -129,7 +129,7 @@ struct _region_info { static const struct _region_info _0_ri; static PyObject *log_fn; -static lm_log_lvl_t log_lvl = LM_ERR; +static vfu_log_lvl_t log_lvl = VFU_ERR; static void _log_fn(void *pvt, const char *msg) @@ -146,16 +146,16 @@ _log_fn(void *pvt, const char *msg) } static PyObject * -libmuser_run(PyObject *self, PyObject *args, PyObject *kwargs) +libvfio_user_run(PyObject *self, PyObject *args, PyObject *kwargs) { static char *kwlist[] = {"vid", "did", "uuid", "log", "log_lvl", "bar0", "bar1", "bar2", "bar3", "bar4", "bar5", "rom", "cfg", "vga", "intx", "msi", "msix", "err", "req", NULL}; int err; - lm_dev_info_t dev_info = { 0 }; + vfu_dev_info_t dev_info = { 0 }; int i; - struct _region_info _ri[LM_PCI_DEV_NUM_REGIONS] = { 0 }; + struct _region_info _ri[VFU_PCI_DEV_NUM_REGIONS] = { 0 }; if (!PyArg_ParseTupleAndKeywords( args, @@ -184,20 +184,20 @@ libmuser_run(PyObject *self, PyObject *args, PyObject *kwargs) return NULL; } - for (i = 0; i < LM_PCI_DEV_NUM_REGIONS; i++) { + for (i = 0; i < VFU_PCI_DEV_NUM_REGIONS; i++) { int j; uint32_t flags = 0; - if (i == LM_PCI_DEV_CFG_REGION_IDX && !memcmp(&_0_ri, &_ri[i], sizeof _0_ri)) { + if (i == VFU_PCI_DEV_CFG_REGION_IDX && !memcmp(&_0_ri, &_ri[i], sizeof _0_ri)) { continue; } if (_ri[i].perm != NULL) { for (j = 0; j < strlen(_ri[i].perm); j++) { if (_ri[i].perm[j] == 'r') { - flags |= LM_REG_FLAG_READ; + flags |= VFU_REG_FLAG_READ; } else if (_ri[i].perm[j] == 'w') { - flags |= LM_REG_FLAG_WRITE; + flags |= VFU_REG_FLAG_WRITE; } else { /* FIXME shouldn't print to stderr */ fprintf(stderr, "bad permission '%c'\n", _ri[i].perm[j]); @@ -219,13 +219,13 @@ libmuser_run(PyObject *self, PyObject *args, PyObject *kwargs) dev_info.log_lvl = log_lvl; } - err = lm_ctx_run(&dev_info); + err = vfu_ctx_run(&dev_info); return Py_BuildValue("i", err); } -static PyMethodDef LibmuserMethods[] = { +static PyMethodDef LibVfioUserMethods[] = { { "run", - (PyCFunction)libmuser_run, + (PyCFunction)libvfio_user_run, METH_VARARGS | METH_KEYWORDS, "runs a device" }, @@ -233,9 +233,9 @@ static PyMethodDef LibmuserMethods[] = { }; PyMODINIT_FUNC -initmuser(void) +initvfiouser(void) { - (void)Py_InitModule("muser", LibmuserMethods); + (void)Py_InitModule("vfio_user", LibVfioUserMethods); } /* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lib/setup.py b/lib/setup.py index 73bd93d..8b37343 100644 --- a/lib/setup.py +++ b/lib/setup.py @@ -29,10 +29,10 @@ from distutils.core import setup, Extension -module1 = Extension('muser', +module1 = Extension('vfio_user', sources = ['python_bindings.c'], #library_dirs=['/usr/local/lib'], - libraries=['muser'], + libraries=['vfio-user'], #extra_compile_args=['-g', '-O0'] ) diff --git a/lib/tran_sock.c b/lib/tran_sock.c index 3ad1222..ad57ccc 100644 --- a/lib/tran_sock.c +++ b/lib/tran_sock.c @@ -43,10 +43,10 @@ #include #include -#include "muser.h" -#include "muser_priv.h" -#include "tran_sock.h" +#include "libvfio-user.h" #include "migration.h" +#include "private.h" +#include "tran_sock.h" // FIXME: is this the value we want? #define SERVER_MAX_FDS 8 @@ -71,13 +71,13 @@ recv_blocking(int sock, void *buf, size_t len, int flags) } static int -init_sock(lm_ctx_t *lm_ctx) +init_sock(vfu_ctx_t *vfu_ctx) { struct sockaddr_un addr = { .sun_family = AF_UNIX }; int ret, unix_sock; mode_t mode; - assert(lm_ctx != NULL); + assert(vfu_ctx != NULL); /* FIXME SPDK can't easily run as non-root */ mode = umask(0000); @@ -87,19 +87,19 @@ init_sock(lm_ctx_t *lm_ctx) goto out; } - if (lm_ctx->flags & LM_FLAG_ATTACH_NB) { + if (vfu_ctx->flags & LIBVFIO_USER_FLAG_ATTACH_NB) { ret = fcntl(unix_sock, F_SETFL, fcntl(unix_sock, F_GETFL, 0) | O_NONBLOCK); if (ret < 0) { ret = -errno; goto out; } - lm_ctx->sock_flags = MSG_DONTWAIT | MSG_WAITALL; + vfu_ctx->sock_flags = MSG_DONTWAIT | MSG_WAITALL; } else { - lm_ctx->sock_flags = 0; + vfu_ctx->sock_flags = 0; } - ret = snprintf(addr.sun_path, sizeof addr.sun_path, "%s", lm_ctx->uuid); + ret = snprintf(addr.sun_path, sizeof addr.sun_path, "%s", vfu_ctx->uuid); if (ret >= (int)sizeof addr.sun_path) { ret = -ENAMETOOLONG; } @@ -131,10 +131,10 @@ out: } int -vfio_user_send_iovec(int sock, uint16_t msg_id, bool is_reply, - enum vfio_user_command cmd, - struct iovec *iovecs, size_t nr_iovecs, - int *fds, int count, int err) +vfu_send_iovec(int sock, uint16_t msg_id, bool is_reply, + enum vfio_user_command cmd, + struct iovec *iovecs, size_t nr_iovecs, + int *fds, int count, int err) { int ret; struct vfio_user_header hdr = {.msg_id = msg_id}; @@ -194,9 +194,9 @@ vfio_user_send_iovec(int sock, uint16_t msg_id, bool is_reply, } int -vfio_user_send(int sock, uint16_t msg_id, bool is_reply, - enum vfio_user_command cmd, - void *data, size_t data_len) +vfu_send(int sock, uint16_t msg_id, bool is_reply, + enum vfio_user_command cmd, + void *data, size_t data_len) { /* [0] is for the header. */ struct iovec iovecs[2] = { @@ -205,17 +205,16 @@ vfio_user_send(int sock, uint16_t msg_id, bool is_reply, .iov_len = data_len } }; - return vfio_user_send_iovec(sock, msg_id, is_reply, cmd, iovecs, - ARRAY_SIZE(iovecs), NULL, 0, 0); + return vfu_send_iovec(sock, msg_id, is_reply, cmd, iovecs, + ARRAY_SIZE(iovecs), NULL, 0, 0); } int -send_vfio_user_error(int sock, uint16_t msg_id, +vfu_send_error(int sock, uint16_t msg_id, enum vfio_user_command cmd, int error) { - return vfio_user_send_iovec(sock, msg_id, true, cmd, - NULL, 0, NULL, 0, error); + return vfu_send_iovec(sock, msg_id, true, cmd, NULL, 0, NULL, 0, error); } /* @@ -228,8 +227,8 @@ send_vfio_user_error(int sock, uint16_t msg_id, * better. */ int -vfio_user_recv(int sock, struct vfio_user_header *hdr, bool is_reply, - uint16_t *msg_id, void *data, size_t *len) +vfu_recv(int sock, struct vfio_user_header *hdr, bool is_reply, + uint16_t *msg_id, void *data, size_t *len) { int ret; @@ -280,19 +279,19 @@ vfio_user_recv(int sock, struct vfio_user_header *hdr, bool is_reply, } /* - * Like vfio_user_recv(), but will automatically allocate reply data. + * Like vfu_recv(), but will automatically allocate reply data. * * FIXME: this does an unconstrained alloc of client-supplied data. */ int -vfio_user_recv_alloc(int sock, struct vfio_user_header *hdr, bool is_reply, - uint16_t *msg_id, void **datap, size_t *lenp) +vfu_recv_alloc(int sock, struct vfio_user_header *hdr, bool is_reply, + uint16_t *msg_id, void **datap, size_t *lenp) { void *data; size_t len; int ret; - ret = vfio_user_recv(sock, hdr, is_reply, msg_id, NULL, NULL); + ret = vfu_recv(sock, hdr, is_reply, msg_id, NULL, NULL); if (ret != 0) { return ret; @@ -335,28 +334,28 @@ vfio_user_recv_alloc(int sock, struct vfio_user_header *hdr, bool is_reply, * messages. */ int -vfio_user_msg_iovec(int sock, uint16_t msg_id, enum vfio_user_command cmd, - struct iovec *iovecs, size_t nr_iovecs, - int *send_fds, size_t fd_count, - struct vfio_user_header *hdr, - void *recv_data, size_t recv_len) +vfu_msg_iovec(int sock, uint16_t msg_id, enum vfio_user_command cmd, + struct iovec *iovecs, size_t nr_iovecs, + int *send_fds, size_t fd_count, + struct vfio_user_header *hdr, + void *recv_data, size_t recv_len) { - int ret = vfio_user_send_iovec(sock, msg_id, false, cmd, iovecs, nr_iovecs, - send_fds, fd_count, 0); + int ret = vfu_send_iovec(sock, msg_id, false, cmd, iovecs, nr_iovecs, + send_fds, fd_count, 0); if (ret < 0) { return ret; } if (hdr == NULL) { hdr = alloca(sizeof *hdr); } - return vfio_user_recv(sock, hdr, true, &msg_id, recv_data, &recv_len); + return vfu_recv(sock, hdr, true, &msg_id, recv_data, &recv_len); } int -vfio_user_msg(int sock, uint16_t msg_id, enum vfio_user_command cmd, - void *send_data, size_t send_len, - struct vfio_user_header *hdr, - void *recv_data, size_t recv_len) +vfu_msg(int sock, uint16_t msg_id, enum vfio_user_command cmd, + void *send_data, size_t send_len, + struct vfio_user_header *hdr, + void *recv_data, size_t recv_len) { /* [0] is for the header. */ struct iovec iovecs[2] = { @@ -365,8 +364,8 @@ vfio_user_msg(int sock, uint16_t msg_id, enum vfio_user_command cmd, .iov_len = send_len } }; - return vfio_user_msg_iovec(sock, msg_id, cmd, iovecs, ARRAY_SIZE(iovecs), - NULL, 0, hdr, recv_data, recv_len); + return vfu_msg_iovec(sock, msg_id, cmd, iovecs, ARRAY_SIZE(iovecs), + NULL, 0, hdr, recv_data, recv_len); } /* @@ -385,8 +384,8 @@ vfio_user_msg(int sock, uint16_t msg_id, enum vfio_user_command cmd, * available in newer library versions, so we don't use it. */ int -vfio_user_parse_version_json(const char *json_str, - int *client_max_fdsp, size_t *pgsizep) +vfu_parse_version_json(const char *json_str, + int *client_max_fdsp, size_t *pgsizep) { struct json_object *jo_caps = NULL; struct json_object *jo_top = NULL; @@ -449,7 +448,7 @@ out: } int -recv_version(lm_ctx_t *lm_ctx, int sock, uint16_t *msg_idp, +recv_version(vfu_ctx_t *vfu_ctx, int sock, uint16_t *msg_idp, struct vfio_user_version **versionp) { struct vfio_user_version *cversion = NULL; @@ -459,36 +458,37 @@ recv_version(lm_ctx_t *lm_ctx, int sock, uint16_t *msg_idp, *versionp = NULL; - ret = vfio_user_recv_alloc(sock, &hdr, false, msg_idp, - (void **)&cversion, &vlen); + ret = vfu_recv_alloc(sock, &hdr, false, msg_idp, + (void **)&cversion, &vlen); if (ret < 0) { - lm_log(lm_ctx, LM_ERR, "failed to receive version: %s", strerror(-ret)); + vfu_log(vfu_ctx, VFU_ERR, "failed to receive version: %s", + strerror(-ret)); return ret; } if (hdr.cmd != VFIO_USER_VERSION) { - lm_log(lm_ctx, LM_ERR, "msg%hx: invalid cmd %hu (expected %hu)", - *msg_idp, hdr.cmd, VFIO_USER_VERSION); + vfu_log(vfu_ctx, VFU_ERR, "msg%hx: invalid cmd %hu (expected %hu)", + *msg_idp, hdr.cmd, VFIO_USER_VERSION); ret = -EINVAL; goto out; } if (vlen < sizeof (*cversion)) { - lm_log(lm_ctx, LM_ERR, "msg%hx (VFIO_USER_VERSION): invalid size %lu", - *msg_idp, vlen); + vfu_log(vfu_ctx, VFU_ERR, + "msg%hx (VFIO_USER_VERSION): invalid size %lu", *msg_idp, vlen); ret = -EINVAL; goto out; } - if (cversion->major != LIB_MUSER_VFIO_USER_VERS_MJ) { - lm_log(lm_ctx, LM_ERR, "unsupported client major %hu (must be %hu)", - cversion->major, LIB_MUSER_VFIO_USER_VERS_MJ); + if (cversion->major != LIB_VFIO_USER_MAJOR) { + vfu_log(vfu_ctx, VFU_ERR, "unsupported client major %hu (must be %hu)", + cversion->major, LIB_VFIO_USER_MAJOR); ret = -ENOTSUP; goto out; } - lm_ctx->client_max_fds = 1; + vfu_ctx->client_max_fds = 1; if (vlen > sizeof (*cversion)) { const char *json_str = (const char *)cversion->data; @@ -496,40 +496,40 @@ recv_version(lm_ctx_t *lm_ctx, int sock, uint16_t *msg_idp, size_t pgsize = 0; if (json_str[len - 1] != '\0') { - lm_log(lm_ctx, LM_ERR, "ignoring invalid JSON from client"); + vfu_log(vfu_ctx, VFU_ERR, "ignoring invalid JSON from client"); ret = -EINVAL; goto out; } - ret = vfio_user_parse_version_json(json_str, &lm_ctx->client_max_fds, - &pgsize); + ret = vfu_parse_version_json(json_str, &vfu_ctx->client_max_fds, + &pgsize); if (ret < 0) { /* No client-supplied strings in the log for release build. */ #ifdef DEBUG - lm_log(lm_ctx, LM_ERR, "failed to parse client JSON \"%s\"", - json_str); + vfu_log(vfu_ctx, VFU_ERR, "failed to parse client JSON \"%s\"", + json_str); #else - lm_log(lm_ctx, LM_ERR, "failed to parse client JSON"); + vfu_log(vfu_ctx, VFU_ERR, "failed to parse client JSON"); #endif goto out; } - if (lm_ctx->migration != NULL && pgsize != 0) { - ret = migration_set_pgsize(lm_ctx->migration, pgsize); + if (vfu_ctx->migration != NULL && pgsize != 0) { + ret = migration_set_pgsize(vfu_ctx->migration, pgsize); if (ret != 0) { - lm_log(lm_ctx, LM_ERR, "refusing client page size of %zu", - pgsize); + vfu_log(vfu_ctx, VFU_ERR, "refusing client page size of %zu", + pgsize); goto out; } } // FIXME: is the code resilient against ->client_max_fds == 0? - if (lm_ctx->client_max_fds < 0 || - lm_ctx->client_max_fds > MUSER_CLIENT_MAX_FDS_LIMIT) { - lm_log(lm_ctx, LM_ERR, "refusing client max_fds of %d", - lm_ctx->client_max_fds); + if (vfu_ctx->client_max_fds < 0 || + vfu_ctx->client_max_fds > VFIO_USER_CLIENT_MAX_FDS_LIMIT) { + vfu_log(vfu_ctx, VFU_ERR, "refusing client max_fds of %d", + vfu_ctx->client_max_fds); ret = -EINVAL; goto out; } @@ -538,7 +538,7 @@ recv_version(lm_ctx_t *lm_ctx, int sock, uint16_t *msg_idp, out: if (ret != 0) { // FIXME: spec, is it OK to just have the header? - (void) send_vfio_user_error(sock, *msg_idp, hdr.cmd, ret); + (void) vfu_send_error(sock, *msg_idp, hdr.cmd, ret); free(cversion); cversion = NULL; } @@ -548,7 +548,7 @@ out: } int -send_version(lm_ctx_t *lm_ctx, int sock, uint16_t msg_id, +send_version(vfu_ctx_t *vfu_ctx, int sock, uint16_t msg_id, struct vfio_user_version *cversion) { struct vfio_user_version sversion = { 0 }; @@ -556,7 +556,7 @@ send_version(lm_ctx_t *lm_ctx, int sock, uint16_t msg_id, char server_caps[1024]; int slen; - if (lm_ctx->migration == NULL) { + if (vfu_ctx->migration == NULL) { slen = snprintf(server_caps, sizeof (server_caps), "{" "\"capabilities\":{" @@ -572,13 +572,13 @@ send_version(lm_ctx_t *lm_ctx, int sock, uint16_t msg_id, "\"pgsize\":%zu" "}" "}" - "}", SERVER_MAX_FDS, migration_get_pgsize(lm_ctx->migration)); + "}", SERVER_MAX_FDS, migration_get_pgsize(vfu_ctx->migration)); } // FIXME: we should save the client minor here, and check that before trying // to send unsupported things. - sversion.major = LIB_MUSER_VFIO_USER_VERS_MJ; - sversion.minor = MIN(cversion->minor, LIB_MUSER_VFIO_USER_VERS_MN); + sversion.major = LIB_VFIO_USER_MAJOR; + sversion.minor = MIN(cversion->minor, LIB_VFIO_USER_MINOR); /* [0] is for the header. */ iovecs[1].iov_base = &sversion; @@ -587,70 +587,70 @@ send_version(lm_ctx_t *lm_ctx, int sock, uint16_t msg_id, /* Include the NUL. */ iovecs[2].iov_len = slen + 1; - return vfio_user_send_iovec(sock, msg_id, true, VFIO_USER_VERSION, - iovecs, ARRAY_SIZE(iovecs), NULL, 0, 0); + return vfu_send_iovec(sock, msg_id, true, VFIO_USER_VERSION, + iovecs, ARRAY_SIZE(iovecs), NULL, 0, 0); } static int -negotiate(lm_ctx_t *lm_ctx, int sock) +negotiate(vfu_ctx_t *vfu_ctx, int sock) { struct vfio_user_version *client_version = NULL; uint16_t msg_id = 1; int ret; - ret = recv_version(lm_ctx, sock, &msg_id, &client_version); + ret = recv_version(vfu_ctx, sock, &msg_id, &client_version); if (ret < 0) { - lm_log(lm_ctx, LM_ERR, "failed to recv version: %s", strerror(-ret)); + vfu_log(vfu_ctx, VFU_ERR, "failed to recv version: %s", strerror(-ret)); return ret; } - ret = send_version(lm_ctx, sock, msg_id, client_version); + ret = send_version(vfu_ctx, sock, msg_id, client_version); free(client_version); if (ret < 0) { - lm_log(lm_ctx, LM_ERR, "failed to send version: %s", strerror(-ret)); + vfu_log(vfu_ctx, VFU_ERR, "failed to send version: %s", strerror(-ret)); } return ret; } /** - * lm_ctx: libmuser context - * FIXME: this shouldn't be happening as part of lm_ctx_create(). + * vfu_ctx: libvfio-user context + * FIXME: this shouldn't be happening as part of vfu_ctx_create(). */ static int -open_sock(lm_ctx_t *lm_ctx) +open_sock(vfu_ctx_t *vfu_ctx) { int ret; int conn_fd; - assert(lm_ctx != NULL); + assert(vfu_ctx != NULL); - conn_fd = accept(lm_ctx->fd, NULL, NULL); + conn_fd = accept(vfu_ctx->fd, NULL, NULL); if (conn_fd == -1) { return conn_fd; } - ret = negotiate(lm_ctx, conn_fd); + ret = negotiate(vfu_ctx, conn_fd); if (ret < 0) { close(conn_fd); return ret; } - lm_ctx->conn_fd = conn_fd; + vfu_ctx->conn_fd = conn_fd; return conn_fd; } static int -close_sock(lm_ctx_t *lm_ctx) +close_sock(vfu_ctx_t *vfu_ctx) { - return close(lm_ctx->conn_fd); + return close(vfu_ctx->conn_fd); } static int -get_request_sock(lm_ctx_t *lm_ctx, struct vfio_user_header *hdr, +get_request_sock(vfu_ctx_t *vfu_ctx, struct vfio_user_header *hdr, int *fds, int *nr_fds) { int ret; @@ -668,7 +668,7 @@ get_request_sock(lm_ctx_t *lm_ctx, struct vfio_user_header *hdr, * faster (?). I tried that and get short reads, so we need to store the * partially received buffer somewhere and retry. */ - ret = recvmsg(lm_ctx->conn_fd, &msg, lm_ctx->sock_flags); + ret = recvmsg(vfu_ctx->conn_fd, &msg, vfu_ctx->sock_flags); if (ret == -1) { return -errno; } diff --git a/lib/tran_sock.h b/lib/tran_sock.h index a4c9a17..b764e9b 100644 --- a/lib/tran_sock.h +++ b/lib/tran_sock.h @@ -30,21 +30,21 @@ * */ -#ifndef LIB_MUSER_TRAN_SOCK_H -#define LIB_MUSER_TRAN_SOCK_H +#ifndef LIB_VFIO_USER_TRAN_SOCK_H +#define LIB_VFIO_USER_TRAN_SOCK_H -#include "muser.h" +#include "libvfio-user.h" /* * These are not public routines, but for convenience, they are used by the - * sample/test code as well as privately within libmuser. + * sample/test code as well as privately within libvfio-user. * * Note there is currently only one transport - talking over a UNIX socket. */ /* The largest number of fd's we are prepared to receive. */ // FIXME: value? -#define MUSER_CLIENT_MAX_FDS_LIMIT (1024) +#define VFIO_USER_CLIENT_MAX_FDS_LIMIT (1024) extern struct transport_ops sock_transport_ops; @@ -53,35 +53,34 @@ extern struct transport_ops sock_transport_ops; * will not be set if not found in the JSON. */ int -vfio_user_parse_version_json(const char *json_str, int *client_max_fdsp, - size_t *pgsizep); +vfu_parse_version_json(const char *json_str, int *client_max_fdsp, + size_t *pgsizep); /* * Send a message to the other end. The iovecs array should leave the first * entry empty, as it will be used for the header. */ int -vfio_user_send_iovec(int sock, uint16_t msg_id, bool is_reply, - enum vfio_user_command cmd, - struct iovec *iovecs, size_t nr_iovecs, - int *fds, int count, - int err); +vfu_send_iovec(int sock, uint16_t msg_id, bool is_reply, + enum vfio_user_command cmd, + struct iovec *iovecs, size_t nr_iovecs, + int *fds, int count, + int err); /* * Send a message to the other end with the given data. */ int -vfio_user_send(int sock, uint16_t msg_id, bool is_reply, - enum vfio_user_command cmd, - void *data, size_t data_len); +vfu_send(int sock, uint16_t msg_id, bool is_reply, enum vfio_user_command cmd, + void *data, size_t data_len); /* * Send an empty reply back to the other end with the given errno. */ int -vfio_user_send_error(int sock, uint16_t msg_id, - enum vfio_user_command cmd, - int error); +vfu_send_error(int sock, uint16_t msg_id, + enum vfio_user_command cmd, + int error); /* * Receive a message from the other end, and place the data into the given @@ -89,9 +88,8 @@ vfio_user_send_error(int sock, uint16_t msg_id, * size. */ int -vfio_user_recv(int sock, struct vfio_user_header *hdr, - bool is_reply, uint16_t *msg_id, - void *data, size_t *len); +vfu_recv(int sock, struct vfio_user_header *hdr, bool is_reply, + uint16_t *msg_id, void *data, size_t *len); /* * Receive a message from the other end, but automatically allocate a buffer for @@ -99,9 +97,8 @@ vfio_user_recv(int sock, struct vfio_user_header *hdr, * NULL. */ int -vfio_user_recv_alloc(int sock, struct vfio_user_header *hdr, - bool is_reply, uint16_t *msg_id, - void **datap, size_t *lenp); +vfu_recv_alloc(int sock, struct vfio_user_header *hdr, bool is_reply, + uint16_t *msg_id, void **datap, size_t *lenp); /* * Send and receive a message to the other end, using iovecs for the send. The @@ -112,24 +109,24 @@ vfio_user_recv_alloc(int sock, struct vfio_user_header *hdr, * the reply header if non-NULL. */ int -vfio_user_msg_iovec(int sock, uint16_t msg_id, - enum vfio_user_command cmd, - struct iovec *iovecs, size_t nr_iovecs, - int *send_fds, size_t fd_count, - struct vfio_user_header *hdr, - void *recv_data, size_t recv_len); +vfu_msg_iovec(int sock, uint16_t msg_id, + enum vfio_user_command cmd, + struct iovec *iovecs, size_t nr_iovecs, + int *send_fds, size_t fd_count, + struct vfio_user_header *hdr, + void *recv_data, size_t recv_len); /* * Send and receive a message to the other end. @hdr is filled with the reply * header if non-NULL. */ int -vfio_user_msg(int sock, uint16_t msg_id, - enum vfio_user_command cmd, - void *send_data, size_t send_len, - struct vfio_user_header *hdr, - void *recv_data, size_t recv_len); +vfu_msg(int sock, uint16_t msg_id, + enum vfio_user_command cmd, + void *send_data, size_t send_len, + struct vfio_user_header *hdr, + void *recv_data, size_t recv_len); -#endif /* LIB_MUSER_TRAN_SOCK_H */ +#endif /* LIB_VFIO_USER_TRAN_SOCK_H */ /* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */ -- cgit v1.1