From 190f85bf9c114bf7c981bb8908394368f84c0c04 Mon Sep 17 00:00:00 2001 From: William Henderson Date: Fri, 15 Sep 2023 16:07:01 +0100 Subject: adapt to VFIO live migration v2 (#782) This commit adapts the vfio-user protocol specification and the libvfio-user implementation to v2 of the VFIO live migration interface, as used in the kernel and QEMU. The differences between v1 and v2 are discussed in this email thread [1], and we slightly differ from upstream VFIO v2 in that instead of transferring data over a new FD, we use the existing UNIX socket with new commands VFIO_USER_MIG_DATA_READ/WRITE. We also don't yet use P2P states. The updated spec was submitted to qemu-devel [2]. [1] https://lore.kernel.org/all/20220130160826.32449-9-yishaih@nvidia.com/ [2] https://lore.kernel.org/all/20230718094150.110183-1-william.henderson@nutanix.com/ Signed-off-by: William Henderson --- lib/common.h | 41 +++- lib/dma.c | 242 +++++++++++++++++----- lib/dma.h | 1 + lib/libvfio-user.c | 506 +++++++++++++++++++++++++++------------------ lib/migration.c | 572 ++++++++++++++++++++------------------------------- lib/migration.h | 24 ++- lib/migration_priv.h | 86 +------- lib/private.h | 14 -- 8 files changed, 783 insertions(+), 703 deletions(-) (limited to 'lib') diff --git a/lib/common.h b/lib/common.h index 07a74a5..40b9b27 100644 --- a/lib/common.h +++ b/lib/common.h @@ -41,6 +41,7 @@ #include #include #include +#include #define UNUSED __attribute__((unused)) #define EXPORT __attribute__((visibility("default"))) @@ -62,6 +63,20 @@ typedef unsigned long long ull_t; +static inline int +ERROR_INT(int err) +{ + errno = err; + return -1; +} + +static inline void * +ERROR_PTR(int err) +{ + errno = err; + return NULL; +} + /* Saturating uint64_t addition. */ static inline uint64_t satadd_u64(uint64_t a, uint64_t b) @@ -73,11 +88,21 @@ satadd_u64(uint64_t a, uint64_t b) /* * The size, in bytes, of the bitmap that represents the given range with the * given page size. + * + * Returns -1 and sets errno if the given page size is invalid for the given + * range. */ -static inline size_t -_get_bitmap_size(size_t size, size_t pgsize) +static inline ssize_t +get_bitmap_size(size_t region_size, size_t pgsize) { - size_t nr_pages = (size / pgsize) + (size % pgsize != 0); + if (pgsize == 0) { + return ERROR_INT(EINVAL); + } + if (region_size < pgsize) { + return ERROR_INT(EINVAL); + } + + size_t nr_pages = (region_size / pgsize) + (region_size % pgsize != 0); return ROUND_UP(nr_pages, sizeof(uint64_t) * CHAR_BIT) / CHAR_BIT; } @@ -107,6 +132,16 @@ close_safely(int *fd) errno = saved_errno; } +static inline void +iov_free(struct iovec *iov) +{ + if (iov->iov_base != NULL) { + free(iov->iov_base); + iov->iov_base = NULL; + } + iov->iov_len = 0; +} + #ifdef UNIT_TEST #define MOCK_DEFINE(f) \ diff --git a/lib/dma.c b/lib/dma.c index 9ca34d0..10e38ff 100644 --- a/lib/dma.c +++ b/lib/dma.c @@ -255,19 +255,6 @@ dma_map_region(dma_controller_t *dma, dma_memory_region_t *region) return 0; } -static ssize_t -get_bitmap_size(size_t region_size, size_t pgsize) -{ - if (pgsize == 0) { - return ERROR_INT(EINVAL); - } - if (region_size < pgsize) { - return ERROR_INT(EINVAL); - } - - return _get_bitmap_size(region_size, pgsize); -} - static int dirty_page_logging_start_on_region(dma_memory_region_t *region, size_t pgsize) { @@ -530,28 +517,173 @@ dma_controller_dirty_page_logging_stop(dma_controller_t *dma) #ifdef DEBUG static void log_dirty_bitmap(vfu_ctx_t *vfu_ctx, dma_memory_region_t *region, - char *bitmap, size_t size) + char *bitmap, size_t size, size_t pgsize) { size_t i; size_t count; for (i = 0, count = 0; i < size; i++) { count += __builtin_popcount((uint8_t)bitmap[i]); } - vfu_log(vfu_ctx, LOG_DEBUG, "dirty pages: get [%p, %p), %zu dirty pages", + vfu_log(vfu_ctx, LOG_DEBUG, + "dirty pages: get [%p, %p), %zu dirty pages of size %zu", region->info.iova.iov_base, iov_end(®ion->info.iova), - count); + count, pgsize); } #endif +static void +dirty_page_exchange(uint8_t *outp, uint8_t *bitmap) +{ + /* + * If no bits are dirty, avoid the atomic exchange. This is obviously + * racy, but it's OK: if we miss a dirty bit being set, we'll catch it + * the next time around. + * + * Otherwise, atomically exchange the dirty bits with zero: as we use + * atomic or in _dma_mark_dirty(), this cannot lose set bits - we might + * miss a bit being set after, but again, we'll catch that next time + * around. + */ + if (*bitmap == 0) { + *outp = 0; + } else { + uint8_t zero = 0; + __atomic_exchange(bitmap, &zero, outp, __ATOMIC_SEQ_CST); + } +} + +static void +dirty_page_get_same_pgsize(dma_memory_region_t *region, char *bitmap, + size_t bitmap_size) +{ + for (size_t i = 0; i < bitmap_size; i++) { + dirty_page_exchange((uint8_t *)&bitmap[i], ®ion->dirty_bitmap[i]); + } +} + +static void +dirty_page_get_extend(dma_memory_region_t *region, char *bitmap, + size_t server_bitmap_size, size_t server_pgsize, + size_t client_bitmap_size, size_t client_pgsize) +{ + /* + * The index of the bit in the client bitmap that we are currently + * considering. By keeping track of this separately to the for loop, we + * allow for one server bit to be repeated for multiple client bytes. + */ + uint8_t client_bit_idx = 0; + size_t server_byte_idx; + int server_bit_idx; + size_t factor = server_pgsize / client_pgsize; + + /* + * Iterate through the bytes of the server bitmap. + */ + for (server_byte_idx = 0; server_byte_idx < server_bitmap_size; + server_byte_idx++) { + + if (client_bit_idx / CHAR_BIT >= client_bitmap_size) { + break; + } + + uint8_t out = 0; + + dirty_page_exchange(&out, ®ion->dirty_bitmap[server_byte_idx]); + + /* + * Iterate through the bits of the server byte, repeating bits to reach + * the desired page size. + */ + for (server_bit_idx = 0; server_bit_idx < CHAR_BIT; server_bit_idx++) { + uint8_t server_bit = (out >> server_bit_idx) & 1; + + /* + * Repeat `factor` times the bit at index `j` of `out`. + * + * OR the same bit from the server bitmap (`server_bit`) with + * `factor` bits in the client bitmap, from `client_bit_idx` to + * `end_client_bit_idx`. + */ + for (size_t end_client_bit_idx = client_bit_idx + factor; + client_bit_idx < end_client_bit_idx; + client_bit_idx++) { + + bitmap[client_bit_idx / CHAR_BIT] |= + server_bit << (client_bit_idx % CHAR_BIT); + } + } + } +} + +static void +dirty_page_get_combine(dma_memory_region_t *region, char *bitmap, + size_t server_bitmap_size, size_t server_pgsize, + size_t client_bitmap_size, size_t client_pgsize) +{ + /* + * The index of the bit in the client bitmap that we are currently + * considering. By keeping track of this separately to the for loop, we + * allow multiple bytes' worth of server bits to be OR'd together to + * calculate one client bit. + */ + uint8_t client_bit_idx = 0; + size_t server_byte_idx; + int server_bit_idx; + size_t factor = client_pgsize / server_pgsize; + + /* + * Iterate through the bytes of the server bitmap. + */ + for (server_byte_idx = 0; server_byte_idx < server_bitmap_size; + server_byte_idx++) { + + if (client_bit_idx / CHAR_BIT >= client_bitmap_size) { + break; + } + + uint8_t out = 0; + + dirty_page_exchange(&out, ®ion->dirty_bitmap[server_byte_idx]); + + /* + * Iterate through the bits of the server byte, combining bits to reach + * the desired page size. + */ + for (server_bit_idx = 0; server_bit_idx < CHAR_BIT; server_bit_idx++) { + uint8_t server_bit = (out >> server_bit_idx) & 1; + + /* + * OR `factor` bits of the server bitmap with the same bit at + * index `client_bit_idx` in the client bitmap. + */ + bitmap[client_bit_idx / CHAR_BIT] |= + server_bit << (client_bit_idx % CHAR_BIT); + + /* + * Only move onto the next bit in the client bitmap once we've + * OR'd `factor` bits. + */ + if (((server_byte_idx * CHAR_BIT) + server_bit_idx) % factor + == factor - 1) { + client_bit_idx++; + + if (client_bit_idx / CHAR_BIT >= client_bitmap_size) { + return; + } + } + } + } +} + int dma_controller_dirty_page_get(dma_controller_t *dma, vfu_dma_addr_t addr, - uint64_t len, size_t pgsize, size_t size, + uint64_t len, size_t client_pgsize, size_t size, char *bitmap) { dma_memory_region_t *region; - ssize_t bitmap_size; + ssize_t server_bitmap_size; + ssize_t client_bitmap_size; dma_sg_t sg; - size_t i; int ret; assert(dma != NULL); @@ -574,24 +706,40 @@ dma_controller_dirty_page_get(dma_controller_t *dma, vfu_dma_addr_t addr, return ERROR_INT(ENOTSUP); } - if (pgsize != dma->dirty_pgsize) { - vfu_log(dma->vfu_ctx, LOG_ERR, "bad page size %zu", pgsize); + /* + * If dirty page logging is not enabled, the requested page size is zero, + * or the requested page size is not a power of two, return an error. + */ + if (dma->dirty_pgsize == 0) { + vfu_log(dma->vfu_ctx, LOG_ERR, "dirty page logging not enabled"); + return ERROR_INT(EINVAL); + } + if (client_pgsize == 0 || (client_pgsize & (client_pgsize - 1)) != 0) { + vfu_log(dma->vfu_ctx, LOG_ERR, "bad client page size %zu", + client_pgsize); return ERROR_INT(EINVAL); } - bitmap_size = get_bitmap_size(len, pgsize); - if (bitmap_size < 0) { - vfu_log(dma->vfu_ctx, LOG_ERR, "failed to get bitmap size"); - return bitmap_size; + server_bitmap_size = get_bitmap_size(len, dma->dirty_pgsize); + if (server_bitmap_size < 0) { + vfu_log(dma->vfu_ctx, LOG_ERR, "failed to get server bitmap size"); + return server_bitmap_size; + } + + client_bitmap_size = get_bitmap_size(len, client_pgsize); + if (client_bitmap_size < 0) { + vfu_log(dma->vfu_ctx, LOG_ERR, "bad client page size %zu", + client_pgsize); + return client_bitmap_size; } /* * They must be equal because this is how much data the client expects to * receive. */ - if (size != (size_t)bitmap_size) { - vfu_log(dma->vfu_ctx, LOG_ERR, "bad bitmap size %zu != %zu", size, - bitmap_size); + if (size != (size_t)client_bitmap_size) { + vfu_log(dma->vfu_ctx, LOG_ERR, "bad client bitmap size %zu != %zu", + size, client_bitmap_size); return ERROR_INT(EINVAL); } @@ -602,31 +750,29 @@ dma_controller_dirty_page_get(dma_controller_t *dma, vfu_dma_addr_t addr, return ERROR_INT(EINVAL); } - for (i = 0; i < (size_t)bitmap_size; i++) { - uint8_t val = region->dirty_bitmap[i]; - uint8_t *outp = (uint8_t *)&bitmap[i]; - + if (client_pgsize == dma->dirty_pgsize) { + dirty_page_get_same_pgsize(region, bitmap, client_bitmap_size); + } else if (client_pgsize < dma->dirty_pgsize) { /* - * If no bits are dirty, avoid the atomic exchange. This is obviously - * racy, but it's OK: if we miss a dirty bit being set, we'll catch it - * the next time around. - * - * Otherwise, atomically exchange the dirty bits with zero: as we use - * atomic or in _dma_mark_dirty(), this cannot lose set bits - we might - * miss a bit being set after, but again, we'll catch that next time - * around. + * If the requested page size is less than that used for logging by + * the server, the bitmap will need to be extended, repeating bits. */ - if (val == 0) { - *outp = 0; - } else { - uint8_t zero = 0; - __atomic_exchange(®ion->dirty_bitmap[i], &zero, - outp, __ATOMIC_SEQ_CST); - } + dirty_page_get_extend(region, bitmap, server_bitmap_size, + dma->dirty_pgsize, client_bitmap_size, + client_pgsize); + } else { + /* + * If the requested page size is larger than that used for logging by + * the server, the bitmap will need to combine bits with OR, losing + * accuracy. + */ + dirty_page_get_combine(region, bitmap, server_bitmap_size, + dma->dirty_pgsize, client_bitmap_size, + client_pgsize); } #ifdef DEBUG - log_dirty_bitmap(dma->vfu_ctx, region, bitmap, size); + log_dirty_bitmap(dma->vfu_ctx, region, bitmap, size, client_pgsize); #endif return 0; diff --git a/lib/dma.h b/lib/dma.h index 9687f49..789904f 100644 --- a/lib/dma.h +++ b/lib/dma.h @@ -386,6 +386,7 @@ int dma_controller_dirty_page_get(dma_controller_t *dma, vfu_dma_addr_t addr, uint64_t len, size_t pgsize, size_t size, char *bitmap); + bool dma_sg_is_mappable(const dma_controller_t *dma, const dma_sg_t *sg); diff --git a/lib/libvfio-user.c b/lib/libvfio-user.c index 271a269..81b0010 100644 --- a/lib/libvfio-user.c +++ b/lib/libvfio-user.c @@ -83,21 +83,16 @@ vfu_log(vfu_ctx_t *vfu_ctx, int level, const char *fmt, ...) } static size_t -get_vfio_caps_size(bool is_migr_reg, vfu_reg_info_t *reg) +get_vfio_caps_size(vfu_reg_info_t *reg) { - size_t type_size = 0; size_t sparse_size = 0; - if (is_migr_reg) { - type_size = sizeof(struct vfio_region_info_cap_type); - } - if (reg->nr_mmap_areas != 0) { sparse_size = sizeof(struct vfio_region_info_cap_sparse_mmap) + (reg->nr_mmap_areas * sizeof(struct vfio_region_sparse_mmap_area)); } - return type_size + sparse_size; + return sparse_size; } /* @@ -106,7 +101,7 @@ get_vfio_caps_size(bool is_migr_reg, vfu_reg_info_t *reg) * points accordingly. */ static int -dev_get_caps(vfu_ctx_t *vfu_ctx, vfu_reg_info_t *vfu_reg, bool is_migr_reg, +dev_get_caps(vfu_ctx_t *vfu_ctx, vfu_reg_info_t *vfu_reg, struct vfio_region_info *vfio_reg, int **fds, size_t *nr_fds) { struct vfio_info_cap_header *header; @@ -120,16 +115,6 @@ dev_get_caps(vfu_ctx_t *vfu_ctx, vfu_reg_info_t *vfu_reg, bool is_migr_reg, header = (struct vfio_info_cap_header*)(vfio_reg + 1); - if (is_migr_reg) { - type = (struct vfio_region_info_cap_type *)header; - type->header.id = VFIO_REGION_INFO_CAP_TYPE; - type->header.version = 1; - type->header.next = 0; - type->type = VFIO_REGION_TYPE_MIGRATION; - type->subtype = VFIO_REGION_SUBTYPE_MIGRATION; - vfio_reg->cap_offset = sizeof(struct vfio_region_info); - } - if (vfu_reg->mmap_areas != NULL) { int i, nr_mmap_areas = vfu_reg->nr_mmap_areas; if (type != NULL) { @@ -218,14 +203,6 @@ region_access(vfu_ctx_t *vfu_ctx, size_t region, char *buf, if (ret == -1) { goto out; } - } else if (region == VFU_PCI_DEV_MIGR_REGION_IDX) { - if (vfu_ctx->migration == NULL) { - vfu_log(vfu_ctx, LOG_ERR, "migration not enabled"); - ret = ERROR_INT(EINVAL); - goto out; - } - - ret = migration_region_access(vfu_ctx, buf, count, offset, is_write); } else { vfu_region_access_cb_t *cb = vfu_ctx->reg_info[region].cb; @@ -293,8 +270,7 @@ is_valid_region_access(vfu_ctx_t *vfu_ctx, size_t size, uint16_t cmd, return false; } - if (unlikely(device_is_stopped_and_copying(vfu_ctx->migration) && - index != VFU_PCI_DEV_MIGR_REGION_IDX)) { + if (unlikely(device_is_stopped_and_copying(vfu_ctx->migration))) { vfu_log(vfu_ctx, LOG_ERR, "cannot access region %zu while device in stop-and-copy state", index); @@ -421,8 +397,7 @@ handle_device_get_region_info(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg) vfu_reg = &vfu_ctx->reg_info[in_info->index]; if (vfu_reg->size > 0) { - caps_size = get_vfio_caps_size(in_info->index == VFU_PCI_DEV_MIGR_REGION_IDX, - vfu_reg); + caps_size = get_vfio_caps_size(vfu_reg); } msg->out.iov.iov_len = MIN(sizeof(*out_info) + caps_size, in_info->argsz); @@ -457,9 +432,8 @@ handle_device_get_region_info(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg) /* Only actually provide the caps if they fit. */ if (in_info->argsz >= out_info->argsz) { out_info->flags |= VFIO_REGION_INFO_FLAG_CAPS; - ret = dev_get_caps(vfu_ctx, vfu_reg, - in_info->index == VFU_PCI_DEV_MIGR_REGION_IDX, - out_info, &msg->out.fds, &msg->out.nr_fds); + ret = dev_get_caps(vfu_ctx, vfu_reg, out_info, &msg->out.fds, + &msg->out.nr_fds); if (ret < 0) { return ret; } @@ -917,133 +891,320 @@ static int device_reset(vfu_ctx_t *vfu_ctx, vfu_reset_type_t reason) { int ret; - + ret = call_reset_cb(vfu_ctx, reason); if (ret < 0) { return ret; } if (vfu_ctx->migration != NULL) { - return handle_device_state(vfu_ctx, vfu_ctx->migration, - VFIO_DEVICE_STATE_V1_RUNNING, false); + migr_state_transition(vfu_ctx->migration, + VFIO_USER_DEVICE_STATE_RUNNING); } return 0; } -static int -handle_dirty_pages_get(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg) +static uint32_t +device_feature_flags_supported(vfu_ctx_t *vfu_ctx, uint32_t feature) { - struct vfio_user_dirty_pages *dirty_pages_in; - struct vfio_user_dirty_pages *dirty_pages_out; - struct vfio_user_bitmap_range *range_in; - struct vfio_user_bitmap_range *range_out; - size_t argsz; - int ret; + if (vfu_ctx->migration == NULL) { + /* + * All of the current features require migration. + */ + return 0; + } + switch (feature) { + case VFIO_DEVICE_FEATURE_MIGRATION: + case VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT: + return VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_PROBE; + case VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE: + return VFIO_DEVICE_FEATURE_GET + | VFIO_DEVICE_FEATURE_SET + | VFIO_DEVICE_FEATURE_PROBE; + case VFIO_DEVICE_FEATURE_DMA_LOGGING_START: + case VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP: + return VFIO_DEVICE_FEATURE_SET | VFIO_DEVICE_FEATURE_PROBE; + default: + return 0; + }; +} - dirty_pages_in = msg->in.iov.iov_base; +static bool +is_migration_feature(uint32_t feature) +{ + switch (feature) { + case VFIO_DEVICE_FEATURE_MIGRATION: + case VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE: + return true; + } - if (msg->in.iov.iov_len < sizeof(*dirty_pages_in) + sizeof(*range_in) || - dirty_pages_in->argsz > SERVER_MAX_DATA_XFER_SIZE || - dirty_pages_in->argsz < sizeof(*dirty_pages_out)) { - vfu_log(vfu_ctx, LOG_ERR, "invalid message size=%zu argsz=%u", - msg->in.iov.iov_len, dirty_pages_in->argsz); - return ERROR_INT(EINVAL); + return false; +} + +static bool +is_dma_feature(uint32_t feature) +{ + switch (feature) { + case VFIO_DEVICE_FEATURE_DMA_LOGGING_START: + case VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP: + case VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT: + return true; } - range_in = msg->in.iov.iov_base + sizeof(*dirty_pages_in); + return false; +} - /* - * range_in is client-controlled, but we only need to protect against - * overflow here: we'll take MIN() against a validated value next, and - * dma_controller_dirty_page_get() will validate the actual ->bitmap.size - * value later, anyway. +static int +handle_migration_device_feature_get(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg, + struct vfio_user_device_feature *req) +{ + /* + * All supported outgoing data is currently the same size as + * struct vfio_user_device_feature_migration. */ - argsz = satadd_u64(sizeof(*dirty_pages_out) + sizeof(*range_out), - range_in->bitmap.size); + msg->out.iov.iov_len = sizeof(struct vfio_user_device_feature) + + sizeof(struct vfio_user_device_feature_migration); + + if (req->argsz < msg->out.iov.iov_len) { + iov_free(&msg->out.iov); + return ERROR_INT(EINVAL); + } + + msg->out.iov.iov_base = calloc(1, msg->out.iov.iov_len); - msg->out.iov.iov_len = MIN(dirty_pages_in->argsz, argsz); - msg->out.iov.iov_base = malloc(msg->out.iov.iov_len); if (msg->out.iov.iov_base == NULL) { - return -1; + return ERROR_INT(ENOMEM); } - dirty_pages_out = msg->out.iov.iov_base; - memcpy(dirty_pages_out, dirty_pages_in, sizeof(*dirty_pages_out)); - dirty_pages_out->argsz = argsz; - /* - * If the reply doesn't fit, reply with just the dirty pages header, giving - * the needed argsz. Typically this shouldn't happen, as the client knows - * the needed reply size and has already provided the correct bitmap size. - */ - if (dirty_pages_in->argsz >= argsz) { - void *bitmap_out = msg->out.iov.iov_base + sizeof(*dirty_pages_out) - + sizeof(*range_out); - range_out = msg->out.iov.iov_base + sizeof(*dirty_pages_out); - memcpy(range_out, range_in, sizeof(*range_out)); - ret = dma_controller_dirty_page_get(vfu_ctx->dma, - (vfu_dma_addr_t)(uintptr_t)range_in->iova, - range_in->size, - range_in->bitmap.pgsize, - range_in->bitmap.size, bitmap_out); - if (ret != 0) { - ret = errno; - vfu_log(vfu_ctx, LOG_WARNING, - "failed to get dirty bitmap from DMA controller: %m"); - free(msg->out.iov.iov_base); - msg->out.iov.iov_base = NULL; - msg->out.iov.iov_len = 0; - return ERROR_INT(ret); + memcpy(msg->out.iov.iov_base, msg->in.iov.iov_base, + sizeof(struct vfio_user_device_feature)); + + struct vfio_user_device_feature *res = msg->out.iov.iov_base; + res->argsz = msg->out.iov.iov_len; + + switch (req->flags & VFIO_DEVICE_FEATURE_MASK) { + case VFIO_DEVICE_FEATURE_MIGRATION: { + struct vfio_user_device_feature_migration *mig = + (void *)res->data; + // FIXME are these always supported? Can we consider to be + // "supported" if said support is just an empty callback? + // + // We don't need to return RUNNING or ERROR since they are + // always supported. + mig->flags = VFIO_MIGRATION_STOP_COPY + | VFIO_MIGRATION_PRE_COPY; + return 0; } - } else { - vfu_log(vfu_ctx, LOG_ERR, - "dirty pages: get [%#llx, %#llx): buffer too small (%u < %zu)", - (ull_t)range_in->iova, (ull_t)range_in->iova + range_in->size, - dirty_pages_in->argsz, argsz); + + case VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE: { + struct vfio_user_device_feature_mig_state *state = + (void *)res->data; + state->device_state = migration_get_state(vfu_ctx); + return 0; + } + + default: + vfu_log(vfu_ctx, LOG_ERR, "invalid flags for migration GET (%d)", + req->flags); + return ERROR_INT(EINVAL); } +} - return 0; +static int +handle_migration_device_feature_set(vfu_ctx_t *vfu_ctx, uint32_t feature, + struct vfio_user_device_feature *res) +{ + assert(feature == VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE); + + struct vfio_user_device_feature_mig_state *state = (void *)res->data; + + return migration_set_state(vfu_ctx, state->device_state); } static int -handle_dirty_pages(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg) +handle_dma_device_feature_get(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg, + struct vfio_user_device_feature *req) { - struct vfio_user_dirty_pages *dirty_pages = msg->in.iov.iov_base; - int ret; + const size_t header_size = sizeof(struct vfio_user_device_feature) + + sizeof(struct vfio_user_device_feature_dma_logging_report); + + struct vfio_user_device_feature_dma_logging_report *rep = + (void *)req->data; + + dma_controller_t *dma = vfu_ctx->dma; + + if (dma == NULL) { + vfu_log(vfu_ctx, LOG_ERR, "DMA not enabled for DMA device feature"); + return ERROR_INT(EINVAL); + } + + ssize_t bitmap_size = get_bitmap_size(rep->length, rep->page_size); + if (bitmap_size < 0) { + return bitmap_size; + } + + msg->out.iov.iov_len = header_size + bitmap_size; + + if (req->argsz < msg->out.iov.iov_len) { + iov_free(&msg->out.iov); + return ERROR_INT(EINVAL); + } + + msg->out.iov.iov_base = calloc(1, msg->out.iov.iov_len); + + if (msg->out.iov.iov_base == NULL) { + return ERROR_INT(ENOMEM); + } + + memcpy(msg->out.iov.iov_base, msg->in.iov.iov_base, header_size); + + struct vfio_user_device_feature *res = msg->out.iov.iov_base; + + res->argsz = msg->out.iov.iov_len; + char *bitmap = (char *)msg->out.iov.iov_base + header_size; + + int ret = dma_controller_dirty_page_get(dma, + (vfu_dma_addr_t) rep->iova, + rep->length, + rep->page_size, + bitmap_size, + bitmap); + + if (ret < 0) { + iov_free(&msg->out.iov); + } + + return ret; +} + +static int +handle_dma_device_feature_set(vfu_ctx_t *vfu_ctx, uint32_t feature, + struct vfio_user_device_feature *res) +{ + dma_controller_t *dma = vfu_ctx->dma; + + assert(dma != NULL); + + if (feature == VFIO_DEVICE_FEATURE_DMA_LOGGING_START) { + struct vfio_user_device_feature_dma_logging_control *ctl = + (void *)res->data; + return dma_controller_dirty_page_logging_start(dma, + ctl->page_size); + } + + assert(feature == VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP); + + dma_controller_dirty_page_logging_stop(dma); + return 0; +} + +static int +handle_device_feature(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg) +{ assert(vfu_ctx != NULL); assert(msg != NULL); - if (msg->in.iov.iov_len < sizeof(*dirty_pages) || - dirty_pages->argsz < sizeof(*dirty_pages)) { - vfu_log(vfu_ctx, LOG_ERR, "invalid message size %zu", msg->in.iov.iov_len); + if (msg->in.iov.iov_len < sizeof(struct vfio_user_device_feature)) { + vfu_log(vfu_ctx, LOG_ERR, "message too short (%ld)", + msg->in.iov.iov_len); return ERROR_INT(EINVAL); } - if (vfu_ctx->migration == NULL) { - vfu_log(vfu_ctx, LOG_ERR, "migration not configured"); - return ERROR_INT(ENOTSUP); + struct vfio_user_device_feature *req = msg->in.iov.iov_base; + + uint32_t operations = req->flags & ~VFIO_DEVICE_FEATURE_MASK; + uint32_t feature = req->flags & VFIO_DEVICE_FEATURE_MASK; + + uint32_t supported_ops = device_feature_flags_supported(vfu_ctx, feature); + + if ((req->flags & supported_ops) != operations || supported_ops == 0) { + vfu_log(vfu_ctx, LOG_ERR, "unsupported operation(s), flags=%d", + req->flags); + return ERROR_INT(EINVAL); } - switch (dirty_pages->flags) { - case VFIO_IOMMU_DIRTY_PAGES_FLAG_START: - ret = dma_controller_dirty_page_logging_start(vfu_ctx->dma, - migration_get_pgsize(vfu_ctx->migration)); - break; + ssize_t ret; - case VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP: - dma_controller_dirty_page_logging_stop(vfu_ctx->dma); - ret = 0; - break; + switch (operations) { + case VFIO_DEVICE_FEATURE_GET: { + if (is_migration_feature(feature)) { + ret = handle_migration_device_feature_get(vfu_ctx, msg, req); + } else if (is_dma_feature(feature)) { + ret = handle_dma_device_feature_get(vfu_ctx, msg, req); + } else { + vfu_log(vfu_ctx, LOG_ERR, "unsupported feature %d for GET", + feature); + return ERROR_INT(EINVAL); + } + break; + } - case VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP: - ret = handle_dirty_pages_get(vfu_ctx, msg); - break; + case VFIO_DEVICE_FEATURE_SET: { + msg->out.iov.iov_len = msg->in.iov.iov_len; - default: - vfu_log(vfu_ctx, LOG_ERR, "bad flags %#x", dirty_pages->flags); - ret = ERROR_INT(EINVAL); - break; + if (req->argsz < msg->out.iov.iov_len) { + vfu_log(vfu_ctx, LOG_ERR, "bad argsz (%d<%ld)", req->argsz, + msg->out.iov.iov_len); + iov_free(&msg->out.iov); + return ERROR_INT(EINVAL); + } + + msg->out.iov.iov_base = malloc(msg->out.iov.iov_len); + + if (msg->out.iov.iov_base == NULL) { + return ERROR_INT(ENOMEM); + } + + memcpy(msg->out.iov.iov_base, msg->in.iov.iov_base, + msg->out.iov.iov_len); + + struct vfio_user_device_feature *res = msg->out.iov.iov_base; + + if (is_migration_feature(feature)) { + ret = handle_migration_device_feature_set(vfu_ctx, feature, res); + } else if (is_dma_feature(feature)) { + ret = handle_dma_device_feature_set(vfu_ctx, feature, res); + } else { + vfu_log(vfu_ctx, LOG_ERR, "unsupported feature %d for SET", + feature); + return ERROR_INT(EINVAL); + } + break; + } + + default: { + /* + * PROBE allows GET/SET to also be set (to specify which operations + * we want to probe the feature for), so we only check that PROBE + * is set, not that it is the only operation flag set. + */ + if (!(operations & VFIO_DEVICE_FEATURE_PROBE)) { + vfu_log(vfu_ctx, LOG_ERR, "no operation specified"); + return ERROR_INT(EINVAL); + } + + msg->out.iov.iov_len = msg->in.iov.iov_len; + + if (req->argsz < msg->out.iov.iov_len) { + vfu_log(vfu_ctx, LOG_ERR, "bad argsz (%d<%ld)", req->argsz, + msg->out.iov.iov_len); + iov_free(&msg->out.iov); + return ERROR_INT(EINVAL); + } + + msg->out.iov.iov_base = malloc(msg->out.iov.iov_len); + + if (msg->out.iov.iov_base == NULL) { + return ERROR_INT(ENOMEM); + } + + memcpy(msg->out.iov.iov_base, msg->in.iov.iov_base, + msg->out.iov.iov_len); + + ret = 0; + } } return ret; @@ -1207,13 +1368,16 @@ handle_request(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg) ret = device_reset(vfu_ctx, VFU_RESET_DEVICE); break; - case VFIO_USER_DIRTY_PAGES: - // FIXME: don't allow migration calls if migration == NULL - if (vfu_ctx->dma != NULL) { - ret = handle_dirty_pages(vfu_ctx, msg); - } else { - ret = 0; - } + case VFIO_USER_DEVICE_FEATURE: + ret = handle_device_feature(vfu_ctx, msg); + break; + + case VFIO_USER_MIG_DATA_READ: + ret = handle_mig_data_read(vfu_ctx, msg); + break; + + case VFIO_USER_MIG_DATA_WRITE: + ret = handle_mig_data_write(vfu_ctx, msg); break; default: @@ -1317,7 +1481,8 @@ MOCK_DEFINE(cmd_allowed_when_stopped_and_copying)(uint16_t cmd) { return cmd == VFIO_USER_REGION_READ || cmd == VFIO_USER_REGION_WRITE || - cmd == VFIO_USER_DIRTY_PAGES; + cmd == VFIO_USER_DEVICE_FEATURE || + cmd == VFIO_USER_MIG_DATA_READ; } bool @@ -1343,14 +1508,14 @@ static bool access_needs_quiesce(const vfu_ctx_t *vfu_ctx, size_t region_index, uint64_t offset) { - return access_migration_needs_quiesce(vfu_ctx, region_index, offset) - || access_is_pci_cap_exp(vfu_ctx, region_index, offset); + return access_is_pci_cap_exp(vfu_ctx, region_index, offset); } static bool command_needs_quiesce(vfu_ctx_t *vfu_ctx, const vfu_msg_t *msg) { struct vfio_user_region_access *reg; + struct vfio_user_device_feature *feature; if (vfu_ctx->quiesce == NULL) { return false; @@ -1364,22 +1529,11 @@ command_needs_quiesce(vfu_ctx_t *vfu_ctx, const vfu_msg_t *msg) case VFIO_USER_DEVICE_RESET: return true; - case VFIO_USER_DIRTY_PAGES: { - struct vfio_user_dirty_pages *dirty_pages = msg->in.iov.iov_base; - - if (msg->in.iov.iov_len < sizeof(*dirty_pages)) { - return false; - } - - return !(dirty_pages->flags & VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP); - } - case VFIO_USER_REGION_WRITE: if (msg->in.iov.iov_len < sizeof(*reg)) { /* * bad request, it will be eventually failed by * handle_region_access - * */ return false; } @@ -1388,8 +1542,23 @@ command_needs_quiesce(vfu_ctx_t *vfu_ctx, const vfu_msg_t *msg) return true; } break; + + case VFIO_USER_DEVICE_FEATURE: + if (msg->in.iov.iov_len < sizeof(*feature)) { + /* + * bad request, it will be eventually failed by + * handle_region_access + */ + return false; + } + feature = msg->in.iov.iov_base; + if (migration_feature_needs_quiesce(feature)) { + return true; + } + break; } + return false; } @@ -1842,38 +2011,6 @@ copyin_mmap_areas(vfu_reg_info_t *reg_info, return 0; } -static bool -ranges_intersect(size_t off1, size_t size1, size_t off2, size_t size2) -{ - /* - * For two ranges to intersect, the start of each range must be before the - * end of the other range. - * TODO already defined in lib/pci_caps.c, maybe introduce a file for misc - * utility functions? - */ - return (off1 < (off2 + size2) && off2 < (off1 + size1)); -} - -static bool -maps_over_migr_regs(struct iovec *iov) -{ - return ranges_intersect(0, vfu_get_migr_register_area_size(), - (size_t)iov->iov_base, iov->iov_len); -} - -static bool -validate_sparse_mmaps_for_migr_reg(vfu_reg_info_t *reg) -{ - int i; - - for (i = 0; i < reg->nr_mmap_areas; i++) { - if (maps_over_migr_regs(®->mmap_areas[i])) { - return false; - } - } - return true; -} - EXPORT int vfu_setup_region(vfu_ctx_t *vfu_ctx, int region_idx, size_t size, vfu_region_access_cb_t *cb, int flags, @@ -1919,12 +2056,6 @@ vfu_setup_region(vfu_ctx_t *vfu_ctx, int region_idx, size_t size, return ERROR_INT(EINVAL); } - if (region_idx == VFU_PCI_DEV_MIGR_REGION_IDX && - size < vfu_get_migr_register_area_size()) { - vfu_log(vfu_ctx, LOG_ERR, "invalid migration region size %zu", size); - return ERROR_INT(EINVAL); - } - for (i = 0; i < nr_mmap_areas; i++) { struct iovec *iov = &mmap_areas[i]; if ((size_t)iov_end(iov) > size) { @@ -1956,15 +2087,6 @@ vfu_setup_region(vfu_ctx_t *vfu_ctx, int region_idx, size_t size, } } - if (region_idx == VFU_PCI_DEV_MIGR_REGION_IDX) { - if (!validate_sparse_mmaps_for_migr_reg(reg)) { - vfu_log(vfu_ctx, LOG_ERR, - "migration registers cannot be memory mapped"); - errno = EINVAL; - goto err; - } - } - return 0; err: @@ -2044,26 +2166,20 @@ vfu_setup_irq_state_callback(vfu_ctx_t *vfu_ctx, enum vfu_dev_irq_type type, EXPORT int vfu_setup_device_migration_callbacks(vfu_ctx_t *vfu_ctx, - const vfu_migration_callbacks_t *callbacks, - uint64_t data_offset) + const vfu_migration_callbacks_t *callbacks) { int ret = 0; assert(vfu_ctx != NULL); assert(callbacks != NULL); - if (vfu_ctx->reg_info[VFU_PCI_DEV_MIGR_REGION_IDX].size == 0) { - vfu_log(vfu_ctx, LOG_ERR, "no device migration region"); - return ERROR_INT(EINVAL); - } - if (callbacks->version != VFU_MIGR_CALLBACKS_VERS) { vfu_log(vfu_ctx, LOG_ERR, "unsupported migration callbacks version %d", callbacks->version); return ERROR_INT(EINVAL); } - vfu_ctx->migration = init_migration(callbacks, data_offset, &ret); + vfu_ctx->migration = init_migration(callbacks, &ret); if (vfu_ctx->migration == NULL) { vfu_log(vfu_ctx, LOG_ERR, "failed to initialize device migration"); return ERROR_INT(ret); diff --git a/lib/migration.c b/lib/migration.c index 794e7b8..02c29c1 100644 --- a/lib/migration.c +++ b/lib/migration.c @@ -39,17 +39,100 @@ #include "private.h" #include "migration_priv.h" +/* + * This defines valid migration state transitions. Each element in the array + * corresponds to a FROM state and each bit of the element to a TO state. If the + * bit is set, then the transition is allowed. + * + * The indices of each state are those in the vfio_user_device_mig_state enum. + */ +static const char transitions[VFIO_USER_DEVICE_NUM_STATES] = { + [VFIO_USER_DEVICE_STATE_ERROR] = 0, + [VFIO_USER_DEVICE_STATE_STOP] = (1 << VFIO_USER_DEVICE_STATE_RUNNING) | + (1 << VFIO_USER_DEVICE_STATE_STOP_COPY) | + (1 << VFIO_USER_DEVICE_STATE_RESUMING), + [VFIO_USER_DEVICE_STATE_RUNNING] = (1 << VFIO_USER_DEVICE_STATE_STOP) | + (1 << VFIO_USER_DEVICE_STATE_PRE_COPY), + [VFIO_USER_DEVICE_STATE_STOP_COPY] = 1 << VFIO_USER_DEVICE_STATE_STOP, + [VFIO_USER_DEVICE_STATE_RESUMING] = 1 << VFIO_USER_DEVICE_STATE_STOP, + [VFIO_USER_DEVICE_STATE_RUNNING_P2P] = 0, + [VFIO_USER_DEVICE_STATE_PRE_COPY] = (1 << VFIO_USER_DEVICE_STATE_RUNNING) | + (1 << VFIO_USER_DEVICE_STATE_STOP_COPY), + [VFIO_USER_DEVICE_STATE_PRE_COPY_P2P] = 0 +}; + +/* + * The spec dictates that, if no direct transition is allowed, and the + * transition is not one of the explicitly disallowed ones (i.e. anything to + * ERROR, anything from ERROR, and STOP_COPY -> PRE_COPY), we should take the + * shortest allowed path. + * + * This can be indexed as `next_state[current][target] == next`. If next is + * ERROR, then the transition is not allowed. + */ +static const uint32_t +next_state[VFIO_USER_DEVICE_NUM_STATES][VFIO_USER_DEVICE_NUM_STATES] = { + [VFIO_USER_DEVICE_STATE_ERROR] = { 0, 0, 0, 0, 0, 0, 0, 0 }, + [VFIO_USER_DEVICE_STATE_STOP] = { + [VFIO_USER_DEVICE_STATE_ERROR] = VFIO_USER_DEVICE_STATE_ERROR, + [VFIO_USER_DEVICE_STATE_STOP] = VFIO_USER_DEVICE_STATE_STOP, + [VFIO_USER_DEVICE_STATE_RUNNING] = VFIO_USER_DEVICE_STATE_RUNNING, + [VFIO_USER_DEVICE_STATE_STOP_COPY] = VFIO_USER_DEVICE_STATE_STOP_COPY, + [VFIO_USER_DEVICE_STATE_RESUMING] = VFIO_USER_DEVICE_STATE_RESUMING, + [VFIO_USER_DEVICE_STATE_RUNNING_P2P] = VFIO_USER_DEVICE_STATE_ERROR, + [VFIO_USER_DEVICE_STATE_PRE_COPY] = VFIO_USER_DEVICE_STATE_RUNNING, + [VFIO_USER_DEVICE_STATE_PRE_COPY_P2P] = VFIO_USER_DEVICE_STATE_ERROR, + }, + [VFIO_USER_DEVICE_STATE_RUNNING] = { + [VFIO_USER_DEVICE_STATE_ERROR] = VFIO_USER_DEVICE_STATE_ERROR, + [VFIO_USER_DEVICE_STATE_STOP] = VFIO_USER_DEVICE_STATE_STOP, + [VFIO_USER_DEVICE_STATE_RUNNING] = VFIO_USER_DEVICE_STATE_RUNNING, + [VFIO_USER_DEVICE_STATE_STOP_COPY] = VFIO_USER_DEVICE_STATE_STOP, + [VFIO_USER_DEVICE_STATE_RESUMING] = VFIO_USER_DEVICE_STATE_STOP, + [VFIO_USER_DEVICE_STATE_RUNNING_P2P] = VFIO_USER_DEVICE_STATE_ERROR, + [VFIO_USER_DEVICE_STATE_PRE_COPY] = VFIO_USER_DEVICE_STATE_PRE_COPY, + [VFIO_USER_DEVICE_STATE_PRE_COPY_P2P] = VFIO_USER_DEVICE_STATE_ERROR, + }, + [VFIO_USER_DEVICE_STATE_STOP_COPY] = { + [VFIO_USER_DEVICE_STATE_ERROR] = VFIO_USER_DEVICE_STATE_ERROR, + [VFIO_USER_DEVICE_STATE_STOP] = VFIO_USER_DEVICE_STATE_STOP, + [VFIO_USER_DEVICE_STATE_RUNNING] = VFIO_USER_DEVICE_STATE_STOP, + [VFIO_USER_DEVICE_STATE_STOP_COPY] = VFIO_USER_DEVICE_STATE_STOP_COPY, + [VFIO_USER_DEVICE_STATE_RESUMING] = VFIO_USER_DEVICE_STATE_STOP, + [VFIO_USER_DEVICE_STATE_RUNNING_P2P] = VFIO_USER_DEVICE_STATE_ERROR, + [VFIO_USER_DEVICE_STATE_PRE_COPY] = VFIO_USER_DEVICE_STATE_ERROR, + [VFIO_USER_DEVICE_STATE_PRE_COPY_P2P] = VFIO_USER_DEVICE_STATE_ERROR, + }, + [VFIO_USER_DEVICE_STATE_RESUMING] = { + [VFIO_USER_DEVICE_STATE_ERROR] = VFIO_USER_DEVICE_STATE_ERROR, + [VFIO_USER_DEVICE_STATE_STOP] = VFIO_USER_DEVICE_STATE_STOP, + [VFIO_USER_DEVICE_STATE_RUNNING] = VFIO_USER_DEVICE_STATE_STOP, + [VFIO_USER_DEVICE_STATE_STOP_COPY] = VFIO_USER_DEVICE_STATE_STOP, + [VFIO_USER_DEVICE_STATE_RESUMING] = VFIO_USER_DEVICE_STATE_RESUMING, + [VFIO_USER_DEVICE_STATE_RUNNING_P2P] = VFIO_USER_DEVICE_STATE_ERROR, + [VFIO_USER_DEVICE_STATE_PRE_COPY] = VFIO_USER_DEVICE_STATE_STOP, + [VFIO_USER_DEVICE_STATE_PRE_COPY_P2P] = VFIO_USER_DEVICE_STATE_ERROR, + }, + [VFIO_USER_DEVICE_STATE_RUNNING_P2P] = { 0, 0, 0, 0, 0, 0, 0, 0 }, + [VFIO_USER_DEVICE_STATE_PRE_COPY] = { + [VFIO_USER_DEVICE_STATE_ERROR] = VFIO_USER_DEVICE_STATE_ERROR, + [VFIO_USER_DEVICE_STATE_STOP] = VFIO_USER_DEVICE_STATE_RUNNING, + [VFIO_USER_DEVICE_STATE_RUNNING] = VFIO_USER_DEVICE_STATE_RUNNING, + [VFIO_USER_DEVICE_STATE_STOP_COPY] = VFIO_USER_DEVICE_STATE_STOP_COPY, + [VFIO_USER_DEVICE_STATE_RESUMING] = VFIO_USER_DEVICE_STATE_RUNNING, + [VFIO_USER_DEVICE_STATE_RUNNING_P2P] = VFIO_USER_DEVICE_STATE_ERROR, + [VFIO_USER_DEVICE_STATE_PRE_COPY] = VFIO_USER_DEVICE_STATE_PRE_COPY, + [VFIO_USER_DEVICE_STATE_PRE_COPY_P2P] = VFIO_USER_DEVICE_STATE_ERROR, + }, + [VFIO_USER_DEVICE_STATE_PRE_COPY_P2P] = { 0, 0, 0, 0, 0, 0, 0, 0 }, +}; + bool MOCK_DEFINE(vfio_migr_state_transition_is_valid)(uint32_t from, uint32_t to) { - return migr_states[from].state & (1 << to); -} - -EXPORT size_t -vfu_get_migr_register_area_size(void) -{ - return ROUND_UP(sizeof(struct vfio_user_migration_info), - sysconf(_SC_PAGE_SIZE)); + return from < VFIO_USER_DEVICE_NUM_STATES + && to < VFIO_USER_DEVICE_NUM_STATES + && (transitions[from] & (1 << to)) != 0; } /* @@ -57,16 +140,10 @@ vfu_get_migr_register_area_size(void) * in vfu_ctx_t. */ struct migration * -init_migration(const vfu_migration_callbacks_t * callbacks, - uint64_t data_offset, int *err) +init_migration(const vfu_migration_callbacks_t *callbacks, int *err) { struct migration *migr; - if (data_offset < vfu_get_migr_register_area_size()) { - *err = EINVAL; - return NULL; - } - migr = calloc(1, sizeof(*migr)); if (migr == NULL) { *err = ENOMEM; @@ -81,15 +158,13 @@ init_migration(const vfu_migration_callbacks_t * callbacks, migr->pgsize = sysconf(_SC_PAGESIZE); /* FIXME this should be done in vfu_ctx_realize */ - migr->info.device_state = VFIO_DEVICE_STATE_V1_RUNNING; - migr->data_offset = data_offset; + migr->state = VFIO_USER_DEVICE_STATE_RUNNING; migr->callbacks = *callbacks; if (migr->callbacks.transition == NULL || - migr->callbacks.get_pending_bytes == NULL || - migr->callbacks.prepare_data == NULL || migr->callbacks.read_data == NULL || - migr->callbacks.write_data == NULL) { + migr->callbacks.write_data == NULL || + migr->callbacks.version != VFU_MIGR_CALLBACKS_VERS) { free(migr); *err = EINVAL; return NULL; @@ -100,35 +175,29 @@ init_migration(const vfu_migration_callbacks_t * callbacks, void MOCK_DEFINE(migr_state_transition)(struct migration *migr, - enum migr_iter_state state) + enum vfio_user_device_mig_state state) { assert(migr != NULL); - /* FIXME validate that state transition */ - migr->iter.state = state; + migr->state = state; } vfu_migr_state_t -MOCK_DEFINE(migr_state_vfio_to_vfu)(uint32_t device_state) +MOCK_DEFINE(migr_state_vfio_to_vfu)(uint32_t state) { - switch (device_state) { - case VFIO_DEVICE_STATE_V1_STOP: + switch (state) { + case VFIO_USER_DEVICE_STATE_STOP: return VFU_MIGR_STATE_STOP; - case VFIO_DEVICE_STATE_V1_RUNNING: + case VFIO_USER_DEVICE_STATE_RUNNING: return VFU_MIGR_STATE_RUNNING; - case VFIO_DEVICE_STATE_V1_SAVING: - /* - * FIXME How should the device operate during the stop-and-copy - * phase? Should we only allow the migration data to be read from - * the migration region? E.g. Access to any other region should be - * failed? This might be a good question to send to LKML. - */ + case VFIO_USER_DEVICE_STATE_STOP_COPY: return VFU_MIGR_STATE_STOP_AND_COPY; - case VFIO_DEVICE_STATE_V1_RUNNING | VFIO_DEVICE_STATE_V1_SAVING: - return VFU_MIGR_STATE_PRE_COPY; - case VFIO_DEVICE_STATE_V1_RESUMING: + case VFIO_USER_DEVICE_STATE_RESUMING: return VFU_MIGR_STATE_RESUME; + case VFIO_USER_DEVICE_STATE_PRE_COPY: + return VFU_MIGR_STATE_PRE_COPY; + default: + return -1; } - return -1; } /** @@ -165,8 +234,7 @@ MOCK_DEFINE(migr_trans_to_valid_state)(vfu_ctx_t *vfu_ctx, struct migration *mig return ret; } } - migr->info.device_state = device_state; - migr_state_transition(migr, VFIO_USER_MIGR_ITER_STATE_INITIAL); + migr_state_transition(migr, device_state); return 0; } @@ -178,372 +246,176 @@ MOCK_DEFINE(handle_device_state)(vfu_ctx_t *vfu_ctx, struct migration *migr, uint32_t device_state, bool notify) { + assert(vfu_ctx != NULL); assert(migr != NULL); - if (!vfio_migr_state_transition_is_valid(migr->info.device_state, - device_state)) { + if (!vfio_migr_state_transition_is_valid(migr->state, device_state)) { return ERROR_INT(EINVAL); } return migr_trans_to_valid_state(vfu_ctx, migr, device_state, notify); } -/** - * Returns 0 on success, -1 on error setting errno. - */ -static ssize_t -handle_pending_bytes(vfu_ctx_t *vfu_ctx, struct migration *migr, - uint64_t *pending_bytes, bool is_write) +size_t +migration_get_state(vfu_ctx_t *vfu_ctx) { - assert(migr != NULL); - assert(pending_bytes != NULL); + return vfu_ctx->migration->state; +} - if (is_write) { +ssize_t +migration_set_state(vfu_ctx_t *vfu_ctx, uint32_t device_state) +{ + struct migration *migr = vfu_ctx->migration; + uint32_t state; + ssize_t ret = 0; + + if (device_state > VFIO_USER_DEVICE_NUM_STATES) { return ERROR_INT(EINVAL); } + + while (migr->state != device_state && ret == 0) { + state = next_state[migr->state][device_state]; - if (migr->iter.state == VFIO_USER_MIGR_ITER_STATE_FINISHED) { - *pending_bytes = 0; - return 0; - } - - switch (migr->iter.state) { - case VFIO_USER_MIGR_ITER_STATE_INITIAL: - case VFIO_USER_MIGR_ITER_STATE_DATA_PREPARED: - /* - * FIXME what happens if data haven't been consumed in the previous - * iteration? Check https://www.spinics.net/lists/kvm/msg228608.html. - */ - *pending_bytes = migr->iter.pending_bytes = migr->callbacks.get_pending_bytes(vfu_ctx); - - if (*pending_bytes == 0) { - migr_state_transition(migr, VFIO_USER_MIGR_ITER_STATE_FINISHED); - } else { - migr_state_transition(migr, VFIO_USER_MIGR_ITER_STATE_STARTED); - } - break; - case VFIO_USER_MIGR_ITER_STATE_STARTED: - /* - * FIXME We might be wrong returning a cached value, check - * https://www.spinics.net/lists/kvm/msg228608.html - * - */ - *pending_bytes = migr->iter.pending_bytes; - break; - default: + if (state == VFIO_USER_DEVICE_STATE_ERROR) { return ERROR_INT(EINVAL); - } - return 0; -} + } -/* - * FIXME reading or writing migration registers with the wrong device state or - * out of sequence is undefined, but should not result in EINVAL, it should - * simply be ignored. However this way it's easier to catch development errors. - * Make this behavior conditional. - */ + ret = handle_device_state(vfu_ctx, migr, state, true); + }; + + return ret; +} -/** - * Returns 0 on success, -1 on error setting errno. - */ -static ssize_t -handle_data_offset_when_saving(vfu_ctx_t *vfu_ctx, struct migration *migr, - bool is_write) +ssize_t +handle_mig_data_read(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg) { - int ret = 0; - - assert(migr != NULL); + assert(vfu_ctx != NULL); + assert(msg != NULL); - if (is_write) { - vfu_log(vfu_ctx, LOG_ERR, "data_offset is RO when saving"); + if (msg->in.iov.iov_len < sizeof(struct vfio_user_mig_data)) { + vfu_log(vfu_ctx, LOG_ERR, "message too short (%ld)", + msg->in.iov.iov_len); return ERROR_INT(EINVAL); } - switch (migr->iter.state) { - case VFIO_USER_MIGR_ITER_STATE_STARTED: - ret = migr->callbacks.prepare_data(vfu_ctx, &migr->iter.offset, - &migr->iter.size); - if (ret != 0) { - return ret; - } - /* - * FIXME must first read data_offset and then data_size. They way we've - * implemented it now, if data_size is read before data_offset we - * transition to state VFIO_USER_MIGR_ITER_STATE_DATA_PREPARED without - * calling callbacks.prepare_data, which is wrong. Maybe we need - * separate states for data_offset and data_size. - */ - migr_state_transition(migr, VFIO_USER_MIGR_ITER_STATE_DATA_PREPARED); - break; - case VFIO_USER_MIGR_ITER_STATE_DATA_PREPARED: - /* - * data_offset is invariant during a save iteration. - */ - break; - default: - vfu_log(vfu_ctx, LOG_ERR, - "reading data_offset out of sequence is undefined"); + struct migration *migr = vfu_ctx->migration; + struct vfio_user_mig_data *req = msg->in.iov.iov_base; + + if (vfu_ctx->migration == NULL) { + vfu_log(vfu_ctx, LOG_ERR, "migration not enabled"); return ERROR_INT(EINVAL); } - return 0; -} - -/** - * Returns 0 on success, -1 on error setting errno. - */ -static ssize_t -handle_data_offset(vfu_ctx_t *vfu_ctx, struct migration *migr, - uint64_t *offset, bool is_write) -{ - int ret; - - assert(migr != NULL); - assert(offset != NULL); - - switch (migr->info.device_state) { - case VFIO_DEVICE_STATE_V1_SAVING: - case VFIO_DEVICE_STATE_V1_RUNNING | VFIO_DEVICE_STATE_V1_SAVING: - ret = handle_data_offset_when_saving(vfu_ctx, migr, is_write); - if (ret == 0 && !is_write) { - *offset = migr->iter.offset + migr->data_offset; - } - return ret; - case VFIO_DEVICE_STATE_V1_RESUMING: - if (is_write) { - /* TODO writing to read-only registers should be simply ignored */ - vfu_log(vfu_ctx, LOG_ERR, "bad write to migration data_offset"); - return ERROR_INT(EINVAL); - } - ret = migr->callbacks.prepare_data(vfu_ctx, offset, NULL); - if (ret != 0) { - return ret; - } - *offset += migr->data_offset; - return 0; + if (migr->state != VFIO_USER_DEVICE_STATE_PRE_COPY + && migr->state != VFIO_USER_DEVICE_STATE_STOP_COPY) { + vfu_log(vfu_ctx, LOG_ERR, "bad migration state to read data: %d", + migr->state); + return ERROR_INT(EINVAL); } - /* TODO improve error message */ - vfu_log(vfu_ctx, LOG_ERR, - "bad access to migration data_offset in state %s", - migr_states[migr->info.device_state].name); - return ERROR_INT(EINVAL); -} - -/** - * Returns 0 on success, -1 on failure setting errno. - */ -static ssize_t -handle_data_size_when_saving(vfu_ctx_t *vfu_ctx, struct migration *migr, - bool is_write) -{ - assert(migr != NULL); - if (is_write) { - /* TODO improve error message */ - vfu_log(vfu_ctx, LOG_ERR, "data_size is RO when saving"); + if (req->size > vfu_ctx->client_max_data_xfer_size) { + vfu_log(vfu_ctx, LOG_ERR, "transfer size exceeds limit (%d > %ld)", + req->size, vfu_ctx->client_max_data_xfer_size); return ERROR_INT(EINVAL); } - if (migr->iter.state != VFIO_USER_MIGR_ITER_STATE_STARTED && - migr->iter.state != VFIO_USER_MIGR_ITER_STATE_DATA_PREPARED) { - vfu_log(vfu_ctx, LOG_ERR, - "reading data_size ouf of sequence is undefined"); + if (req->argsz < sizeof(struct vfio_user_mig_data) + req->size) { + vfu_log(vfu_ctx, LOG_ERR, "argsz too small (%d < %ld)", + req->argsz, sizeof(struct vfio_user_mig_data) + req->size); return ERROR_INT(EINVAL); } - return 0; -} -/** - * Returns 0 on success, -1 on error setting errno. - */ -static ssize_t -handle_data_size_when_resuming(vfu_ctx_t *vfu_ctx, struct migration *migr, - uint64_t size, bool is_write) -{ - assert(migr != NULL); + msg->out.iov.iov_len = msg->in.iov.iov_len + req->size; + msg->out.iov.iov_base = calloc(1, msg->out.iov.iov_len); - if (is_write) { - return migr->callbacks.data_written(vfu_ctx, size); + if (msg->out.iov.iov_base == NULL) { + return ERROR_INT(ENOMEM); } - return 0; -} -/** - * Returns 0 on success, -1 on failure setting errno. - */ -static ssize_t -handle_data_size(vfu_ctx_t *vfu_ctx, struct migration *migr, - uint64_t *size, bool is_write) -{ - int ret; + struct vfio_user_mig_data *res = msg->out.iov.iov_base; - assert(vfu_ctx != NULL); - assert(size != NULL); - - switch (migr->info.device_state){ - case VFIO_DEVICE_STATE_V1_SAVING: - case VFIO_DEVICE_STATE_V1_RUNNING | VFIO_DEVICE_STATE_V1_SAVING: - ret = handle_data_size_when_saving(vfu_ctx, migr, is_write); - if (ret == 0 && !is_write) { - *size = migr->iter.size; - } + ssize_t ret = migr->callbacks.read_data(vfu_ctx, &res->data, req->size); + + if (ret < 0) { + vfu_log(vfu_ctx, LOG_ERR, "read_data callback failed, errno=%d", errno); + iov_free(&msg->out.iov); return ret; - case VFIO_DEVICE_STATE_V1_RESUMING: - return handle_data_size_when_resuming(vfu_ctx, migr, *size, is_write); } - /* TODO improve error message */ - vfu_log(vfu_ctx, LOG_ERR, "bad access to data_size"); - return ERROR_INT(EINVAL); + + res->size = ret; + res->argsz = sizeof(struct vfio_user_mig_data) + ret; + + return 0; } -/** - * Returns 0 on success, -1 on failure setting errno. - */ ssize_t -MOCK_DEFINE(migration_region_access_registers)(vfu_ctx_t *vfu_ctx, char *buf, - size_t count, loff_t pos, - bool is_write) +handle_mig_data_write(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg) { + assert(vfu_ctx != NULL); + assert(msg != NULL); + + if (msg->in.iov.iov_len < sizeof(struct vfio_user_mig_data)) { + vfu_log(vfu_ctx, LOG_ERR, "message too short (%ld)", + msg->in.iov.iov_len); + return ERROR_INT(EINVAL); + } + struct migration *migr = vfu_ctx->migration; - int ret; - uint32_t *device_state, old_device_state; + struct vfio_user_mig_data *req = msg->in.iov.iov_base; - assert(migr != NULL); + if (vfu_ctx->migration == NULL) { + vfu_log(vfu_ctx, LOG_ERR, "migration not enabled"); + return ERROR_INT(EINVAL); + } - switch (pos) { - case offsetof(struct vfio_user_migration_info, device_state): - if (count != sizeof(migr->info.device_state)) { - vfu_log(vfu_ctx, LOG_ERR, - "bad device_state access size %zu", count); - return ERROR_INT(EINVAL); - } - device_state = (uint32_t *)buf; - if (!is_write) { - *device_state = migr->info.device_state; - return 0; - } - old_device_state = migr->info.device_state; - vfu_log(vfu_ctx, LOG_DEBUG, - "migration: transitioning from state %s to state %s", - migr_states[old_device_state].name, - migr_states[*device_state].name); - - ret = handle_device_state(vfu_ctx, migr, *device_state, true); - if (ret == 0) { - vfu_log(vfu_ctx, LOG_DEBUG, - "migration: transitioned from state %s to state %s", - migr_states[old_device_state].name, - migr_states[*device_state].name); - } else { - vfu_log(vfu_ctx, LOG_ERR, - "migration: failed to transition from state %s to state %s", - migr_states[old_device_state].name, - migr_states[*device_state].name); - } - break; - case offsetof(struct vfio_user_migration_info, pending_bytes): - if (count != sizeof(migr->info.pending_bytes)) { - vfu_log(vfu_ctx, LOG_ERR, - "bad pending_bytes access size %zu", count); - return ERROR_INT(EINVAL); - } - ret = handle_pending_bytes(vfu_ctx, migr, (uint64_t *)buf, is_write); - break; - case offsetof(struct vfio_user_migration_info, data_offset): - if (count != sizeof(migr->info.data_offset)) { - vfu_log(vfu_ctx, LOG_ERR, - "bad data_offset access size %zu", count); - return ERROR_INT(EINVAL); - } - ret = handle_data_offset(vfu_ctx, migr, (uint64_t *)buf, is_write); - break; - case offsetof(struct vfio_user_migration_info, data_size): - if (count != sizeof(migr->info.data_size)) { - vfu_log(vfu_ctx, LOG_ERR, - "bad data_size access size %zu", count); - return ERROR_INT(EINVAL); - } - ret = handle_data_size(vfu_ctx, migr, (uint64_t *)buf, is_write); - break; - default: - vfu_log(vfu_ctx, LOG_ERR, - "bad migration region register offset %#llx", - (ull_t)pos); + if (migr->state != VFIO_USER_DEVICE_STATE_RESUMING) { + vfu_log(vfu_ctx, LOG_ERR, "bad migration state to write data: %d", + migr->state); return ERROR_INT(EINVAL); } - return ret; -} -ssize_t -migration_region_access(vfu_ctx_t *vfu_ctx, char *buf, size_t count, - loff_t pos, bool is_write) -{ - struct migration *migr = vfu_ctx->migration; - ssize_t ret; + if (req->size > vfu_ctx->client_max_data_xfer_size) { + vfu_log(vfu_ctx, LOG_ERR, "transfer size exceeds limit (%d > %ld)", + req->size, vfu_ctx->client_max_data_xfer_size); + return ERROR_INT(EINVAL); + } - assert(migr != NULL); - assert(buf != NULL); + if (req->argsz < sizeof(struct vfio_user_mig_data) + req->size) { + vfu_log(vfu_ctx, LOG_ERR, "argsz too small (%d < %ld)", + req->argsz, sizeof(struct vfio_user_mig_data) + req->size); + return ERROR_INT(EINVAL); + } - /* - * FIXME don't call the device callback if the migration state is in not in - * pre-copy/stop-and-copy/resuming state, since the behavior is undefined - * in that case. - */ + if (msg->in.iov.iov_len < sizeof(struct vfio_user_mig_data) + req->size) { + vfu_log(vfu_ctx, LOG_ERR, "short write (%d < %ld)", + req->argsz, sizeof(struct vfio_user_mig_data) + req->size); + return ERROR_INT(EINVAL); + } - if (pos + count <= sizeof(struct vfio_user_migration_info)) { - ret = migration_region_access_registers(vfu_ctx, buf, count, - pos, is_write); - if (ret != 0) { - return ret; - } - } else { - - if (pos < (loff_t)migr->data_offset) { - /* - * TODO we can simply ignore the access to that part and handle - * any access to the data region properly. - */ - vfu_log(vfu_ctx, LOG_WARNING, - "bad access to dead space %#llx - %#llx in migration region", - (ull_t)pos, - (ull_t)(pos + count - 1)); - return ERROR_INT(EINVAL); - } + ssize_t ret = migr->callbacks.write_data(vfu_ctx, &req->data, req->size); - pos -= migr->data_offset; - if (is_write) { - ret = migr->callbacks.write_data(vfu_ctx, buf, count, pos); - if (ret < 0) { - return -1; - } - } else { - /* - * FIXME says: - * - * d. Read data_size bytes of data from (region + data_offset) from the - * migration region. - * - * Does this mean that partial reads are not allowed? - */ - ret = migr->callbacks.read_data(vfu_ctx, buf, count, pos); - if (ret < 0) { - return -1; - } - } + if (ret < 0) { + vfu_log(vfu_ctx, LOG_ERR, "write_data callback failed, errno=%d", + errno); + return ret; + } else if (ret != req->size) { + vfu_log(vfu_ctx, LOG_ERR, "migration data partial write of size=%ld", + ret); + return ERROR_INT(EINVAL); } - return count; + return 0; } bool MOCK_DEFINE(device_is_stopped_and_copying)(struct migration *migr) { - return migr != NULL && migr->info.device_state == VFIO_DEVICE_STATE_V1_SAVING; + return migr != NULL && migr->state == VFIO_USER_DEVICE_STATE_STOP_COPY; } bool MOCK_DEFINE(device_is_stopped)(struct migration *migr) { - return migr != NULL && migr->info.device_state == VFIO_DEVICE_STATE_V1_STOP; + return migr != NULL && migr->state == VFIO_USER_DEVICE_STATE_STOP; } size_t @@ -569,17 +441,11 @@ migration_set_pgsize(struct migration *migr, size_t pgsize) } bool -access_migration_needs_quiesce(const vfu_ctx_t *vfu_ctx, size_t region_index, - uint64_t offset) +migration_feature_needs_quiesce(struct vfio_user_device_feature *feature) { - /* - * Writing to the migration state register with an unaligned access won't - * trigger this check but that's not a problem because - * migration_region_access_registers will fail the access. - */ - return region_index == VFU_PCI_DEV_MIGR_REGION_IDX - && vfu_ctx->migration != NULL - && offset == offsetof(struct vfio_user_migration_info, device_state); + return ((feature->flags & + (VFIO_DEVICE_FEATURE_SET | VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE)) != 0) + && !(feature->flags & VFIO_DEVICE_FEATURE_PROBE); } /* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lib/migration.h b/lib/migration.h index 26fd744..928a7e5 100644 --- a/lib/migration.h +++ b/lib/migration.h @@ -45,12 +45,19 @@ #include "private.h" struct migration * -init_migration(const vfu_migration_callbacks_t *callbacks, - uint64_t data_offset, int *err); +init_migration(const vfu_migration_callbacks_t *callbacks, int *err); + +size_t +migration_get_state(vfu_ctx_t *vfu_ctx); + +ssize_t +migration_set_state(vfu_ctx_t *vfu_ctx, uint32_t device_state); ssize_t -migration_region_access(vfu_ctx_t *vfu_ctx, char *buf, size_t count, - loff_t pos, bool is_write); +handle_mig_data_read(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg); + +ssize_t +handle_mig_data_write(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg); bool migration_available(vfu_ctx_t *vfu_ctx); @@ -65,6 +72,12 @@ migration_get_pgsize(struct migration *migr); int migration_set_pgsize(struct migration *migr, size_t pgsize); +uint64_t +migration_get_flags(struct migration *migr); + +MOCK_DECLARE(void, migr_state_transition, struct migration *migr, + enum vfio_user_device_mig_state state); + MOCK_DECLARE(bool, vfio_migr_state_transition_is_valid, uint32_t from, uint32_t to); @@ -72,8 +85,7 @@ MOCK_DECLARE(ssize_t, handle_device_state, vfu_ctx_t *vfu_ctx, struct migration *migr, uint32_t device_state, bool notify); bool -access_migration_needs_quiesce(const vfu_ctx_t *vfu_ctx, size_t region_index, - uint64_t offset); +migration_feature_needs_quiesce(struct vfio_user_device_feature *feature); #endif /* LIB_VFIO_USER_MIGRATION_H */ diff --git a/lib/migration_priv.h b/lib/migration_priv.h index d5643af..83c5f7e 100644 --- a/lib/migration_priv.h +++ b/lib/migration_priv.h @@ -33,94 +33,12 @@ #include -/* - * FSM to simplify saving device state. - */ -enum migr_iter_state { - VFIO_USER_MIGR_ITER_STATE_INITIAL, - VFIO_USER_MIGR_ITER_STATE_STARTED, - VFIO_USER_MIGR_ITER_STATE_DATA_PREPARED, - VFIO_USER_MIGR_ITER_STATE_FINISHED -}; - struct migration { - /* - * TODO if the user provides an FD then should mmap it and use the migration - * registers in the file - */ - struct vfio_user_migration_info info; + enum vfio_user_device_mig_state state; size_t pgsize; vfu_migration_callbacks_t callbacks; - uint64_t data_offset; - - /* - * This is only for the saving state. The resuming state is simpler so we - * don't need it. - */ - struct { - enum migr_iter_state state; - uint64_t pending_bytes; - uint64_t offset; - uint64_t size; - } iter; -}; - -struct migr_state_data { - uint32_t state; - const char *name; -}; - -#define VFIO_DEVICE_STATE_V1_ERROR (VFIO_DEVICE_STATE_V1_SAVING | VFIO_DEVICE_STATE_V1_RESUMING) - -/* valid migration state transitions */ -static const struct migr_state_data migr_states[(VFIO_DEVICE_STATE_MASK + 1)] = { - [VFIO_DEVICE_STATE_V1_STOP] = { - .state = - (1 << VFIO_DEVICE_STATE_V1_STOP) | - (1 << VFIO_DEVICE_STATE_V1_RUNNING), - .name = "stopped" - }, - [VFIO_DEVICE_STATE_V1_RUNNING] = { - .state = - (1 << VFIO_DEVICE_STATE_V1_STOP) | - (1 << VFIO_DEVICE_STATE_V1_RUNNING) | - (1 << VFIO_DEVICE_STATE_V1_SAVING) | - (1 << (VFIO_DEVICE_STATE_V1_RUNNING | VFIO_DEVICE_STATE_V1_SAVING)) | - (1 << VFIO_DEVICE_STATE_V1_RESUMING) | - (1 << VFIO_DEVICE_STATE_V1_ERROR), - .name = "running" - }, - [VFIO_DEVICE_STATE_V1_SAVING] = { - .state = - (1 << VFIO_DEVICE_STATE_V1_STOP) | - (1 << VFIO_DEVICE_STATE_V1_RUNNING) | - (1 << VFIO_DEVICE_STATE_V1_SAVING) | - (1 << VFIO_DEVICE_STATE_V1_ERROR), - .name = "stop-and-copy" - }, - [VFIO_DEVICE_STATE_V1_RUNNING | VFIO_DEVICE_STATE_V1_SAVING] = { - .state = - (1 << VFIO_DEVICE_STATE_V1_STOP) | - (1 << VFIO_DEVICE_STATE_V1_SAVING) | - (1 << VFIO_DEVICE_STATE_V1_RUNNING | VFIO_DEVICE_STATE_V1_SAVING) | - (1 << VFIO_DEVICE_STATE_V1_ERROR), - .name = "pre-copy" - }, - [VFIO_DEVICE_STATE_V1_RESUMING] = { - .state = - (1 << VFIO_DEVICE_STATE_V1_RUNNING) | - (1 << VFIO_DEVICE_STATE_V1_RESUMING) | - (1 << VFIO_DEVICE_STATE_V1_ERROR), - .name = "resuming" - } }; -MOCK_DECLARE(ssize_t, migration_region_access_registers, vfu_ctx_t *vfu_ctx, - char *buf, size_t count, loff_t pos, bool is_write); - -MOCK_DECLARE(void, migr_state_transition, struct migration *migr, - enum migr_iter_state state); - MOCK_DECLARE(vfu_migr_state_t, migr_state_vfio_to_vfu, uint32_t device_state); MOCK_DECLARE(int, state_trans_notify, vfu_ctx_t *vfu_ctx, @@ -129,4 +47,4 @@ MOCK_DECLARE(int, state_trans_notify, vfu_ctx_t *vfu_ctx, #endif -/* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */ +/* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */ \ No newline at end of file diff --git a/lib/private.h b/lib/private.h index fdd804f..6e0170e 100644 --- a/lib/private.h +++ b/lib/private.h @@ -195,20 +195,6 @@ typedef struct ioeventfd { LIST_ENTRY(ioeventfd) entry; } ioeventfd_t; -static inline int -ERROR_INT(int err) -{ - errno = err; - return -1; -} - -static inline void * -ERROR_PTR(int err) -{ - errno = err; - return NULL; -} - int consume_fd(int *fds, size_t nr_fds, size_t index); -- cgit v1.1