diff options
author | William Henderson <william.henderson@nutanix.com> | 2023-07-10 10:46:29 +0000 |
---|---|---|
committer | John Levon <john.levon@nutanix.com> | 2023-09-15 12:59:39 +0100 |
commit | 0e5c6655dbd464b4ad8ff1b83f884e9cc2fab8e2 (patch) | |
tree | 7f830dbfa0dcf40a7098d18ca86a16efcd6c0c91 /lib | |
parent | ceca7e55c60a337faa91ed418452b8ba7f2a9b57 (diff) | |
download | libvfio-user-0e5c6655dbd464b4ad8ff1b83f884e9cc2fab8e2.zip libvfio-user-0e5c6655dbd464b4ad8ff1b83f884e9cc2fab8e2.tar.gz libvfio-user-0e5c6655dbd464b4ad8ff1b83f884e9cc2fab8e2.tar.bz2 |
implement migration v2 in libvfio-user
Signed-off-by: William Henderson <william.henderson@nutanix.com>
Diffstat (limited to 'lib')
-rw-r--r-- | lib/libvfio-user.c | 167 | ||||
-rw-r--r-- | lib/migration.c | 455 | ||||
-rw-r--r-- | lib/migration.h | 23 | ||||
-rw-r--r-- | lib/migration_priv.h | 96 |
4 files changed, 197 insertions, 544 deletions
diff --git a/lib/libvfio-user.c b/lib/libvfio-user.c index 663d2cd..7920494 100644 --- a/lib/libvfio-user.c +++ b/lib/libvfio-user.c @@ -83,21 +83,16 @@ vfu_log(vfu_ctx_t *vfu_ctx, int level, const char *fmt, ...) } static size_t -get_vfio_caps_size(bool is_migr_reg, vfu_reg_info_t *reg) +get_vfio_caps_size(vfu_reg_info_t *reg) { - size_t type_size = 0; size_t sparse_size = 0; - if (is_migr_reg) { - type_size = sizeof(struct vfio_region_info_cap_type); - } - if (reg->nr_mmap_areas != 0) { sparse_size = sizeof(struct vfio_region_info_cap_sparse_mmap) + (reg->nr_mmap_areas * sizeof(struct vfio_region_sparse_mmap_area)); } - return type_size + sparse_size; + return sparse_size; } /* @@ -106,7 +101,7 @@ get_vfio_caps_size(bool is_migr_reg, vfu_reg_info_t *reg) * points accordingly. */ static int -dev_get_caps(vfu_ctx_t *vfu_ctx, vfu_reg_info_t *vfu_reg, bool is_migr_reg, +dev_get_caps(vfu_ctx_t *vfu_ctx, vfu_reg_info_t *vfu_reg, struct vfio_region_info *vfio_reg, int **fds, size_t *nr_fds) { struct vfio_info_cap_header *header; @@ -120,16 +115,6 @@ dev_get_caps(vfu_ctx_t *vfu_ctx, vfu_reg_info_t *vfu_reg, bool is_migr_reg, header = (struct vfio_info_cap_header*)(vfio_reg + 1); - if (is_migr_reg) { - type = (struct vfio_region_info_cap_type *)header; - type->header.id = VFIO_REGION_INFO_CAP_TYPE; - type->header.version = 1; - type->header.next = 0; - type->type = VFIO_REGION_TYPE_MIGRATION; - type->subtype = VFIO_REGION_SUBTYPE_MIGRATION; - vfio_reg->cap_offset = sizeof(struct vfio_region_info); - } - if (vfu_reg->mmap_areas != NULL) { int i, nr_mmap_areas = vfu_reg->nr_mmap_areas; if (type != NULL) { @@ -218,14 +203,6 @@ region_access(vfu_ctx_t *vfu_ctx, size_t region, char *buf, if (ret == -1) { goto out; } - } else if (region == VFU_PCI_DEV_MIGR_REGION_IDX) { - if (vfu_ctx->migration == NULL) { - vfu_log(vfu_ctx, LOG_ERR, "migration not enabled"); - ret = ERROR_INT(EINVAL); - goto out; - } - - ret = migration_region_access(vfu_ctx, buf, count, offset, is_write); } else { vfu_region_access_cb_t *cb = vfu_ctx->reg_info[region].cb; @@ -293,8 +270,7 @@ is_valid_region_access(vfu_ctx_t *vfu_ctx, size_t size, uint16_t cmd, return false; } - if (unlikely(device_is_stopped_and_copying(vfu_ctx->migration) && - index != VFU_PCI_DEV_MIGR_REGION_IDX)) { + if (unlikely(device_is_stopped_and_copying(vfu_ctx->migration))) { vfu_log(vfu_ctx, LOG_ERR, "cannot access region %zu while device in stop-and-copy state", index); @@ -421,8 +397,7 @@ handle_device_get_region_info(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg) vfu_reg = &vfu_ctx->reg_info[in_info->index]; if (vfu_reg->size > 0) { - caps_size = get_vfio_caps_size(in_info->index == VFU_PCI_DEV_MIGR_REGION_IDX, - vfu_reg); + caps_size = get_vfio_caps_size(vfu_reg); } msg->out.iov.iov_len = MIN(sizeof(*out_info) + caps_size, in_info->argsz); @@ -457,9 +432,8 @@ handle_device_get_region_info(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg) /* Only actually provide the caps if they fit. */ if (in_info->argsz >= out_info->argsz) { out_info->flags |= VFIO_REGION_INFO_FLAG_CAPS; - ret = dev_get_caps(vfu_ctx, vfu_reg, - in_info->index == VFU_PCI_DEV_MIGR_REGION_IDX, - out_info, &msg->out.fds, &msg->out.nr_fds); + ret = dev_get_caps(vfu_ctx, vfu_reg, out_info, &msg->out.fds, + &msg->out.nr_fds); if (ret < 0) { return ret; } @@ -920,15 +894,14 @@ static int device_reset(vfu_ctx_t *vfu_ctx, vfu_reset_type_t reason) { int ret; - + ret = call_reset_cb(vfu_ctx, reason); if (ret < 0) { return ret; } if (vfu_ctx->migration != NULL) { - return handle_device_state(vfu_ctx, vfu_ctx->migration, - VFIO_DEVICE_STATE_V1_RUNNING, false); + migr_state_transition(vfu_ctx->migration, VFIO_DEVICE_STATE_RUNNING); } return 0; } @@ -1052,6 +1025,48 @@ handle_dirty_pages(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg) return ret; } +static int +handle_device_feature(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg) +{ + struct vfio_user_device_feature *req = msg->in.iov.iov_base; + + if (vfu_ctx->migration == NULL) { + return -EINVAL; + } + + if (!migration_feature_supported(req->flags)) { + // FIXME what error code to return? we really want "not supported" + // instead of "not permitted"? + return -EINVAL; + } + + ssize_t ret; + + if (req->flags & VFIO_DEVICE_FEATURE_PROBE) { + msg->out.iov.iov_base = malloc(msg->in.iov.iov_len); + msg->out.iov.iov_len = msg->in.iov.iov_len; + memcpy(msg->out.iov.iov_base, msg->in.iov.iov_base, + msg->out.iov.iov_len); + + ret = 0; + } else if (req->flags & VFIO_DEVICE_FEATURE_GET) { + msg->out.iov.iov_base = calloc(8, 1); + msg->out.iov.iov_len = 8; + + ret = migration_feature_get(vfu_ctx, req->flags, + msg->out.iov.iov_base); + } else if (req->flags & VFIO_DEVICE_FEATURE_SET) { + msg->out.iov.iov_base = malloc(msg->in.iov.iov_len); + msg->out.iov.iov_len = msg->in.iov.iov_len; + memcpy(msg->out.iov.iov_base, msg->in.iov.iov_base, + msg->out.iov.iov_len); + + ret = migration_feature_set(vfu_ctx, req->flags, req->data); + } + + return ret; +} + static vfu_msg_t * alloc_msg(struct vfio_user_header *hdr, int *fds, size_t nr_fds) { @@ -1221,6 +1236,18 @@ handle_request(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg) } break; + case VFIO_USER_DEVICE_FEATURE: + ret = handle_device_feature(vfu_ctx, msg); + break; + + case VFIO_USER_MIG_DATA_READ: + ret = handle_mig_data_read(vfu_ctx, msg); + break; + + case VFIO_USER_MIG_DATA_WRITE: + ret = handle_mig_data_write(vfu_ctx, msg); + break; + default: msg->processed_cmd = false; vfu_log(vfu_ctx, LOG_ERR, "bad command %d", msg->hdr.cmd); @@ -1324,7 +1351,9 @@ MOCK_DEFINE(cmd_allowed_when_stopped_and_copying)(uint16_t cmd) { return cmd == VFIO_USER_REGION_READ || cmd == VFIO_USER_REGION_WRITE || - cmd == VFIO_USER_DIRTY_PAGES; + cmd == VFIO_USER_DIRTY_PAGES || + cmd == VFIO_USER_DEVICE_FEATURE || + cmd == VFIO_USER_MIG_DATA_READ; } bool @@ -1350,8 +1379,7 @@ static bool access_needs_quiesce(const vfu_ctx_t *vfu_ctx, size_t region_index, uint64_t offset) { - return access_migration_needs_quiesce(vfu_ctx, region_index, offset) - || access_is_pci_cap_exp(vfu_ctx, region_index, offset); + return access_is_pci_cap_exp(vfu_ctx, region_index, offset); } static bool @@ -1849,38 +1877,6 @@ copyin_mmap_areas(vfu_reg_info_t *reg_info, return 0; } -static bool -ranges_intersect(size_t off1, size_t size1, size_t off2, size_t size2) -{ - /* - * For two ranges to intersect, the start of each range must be before the - * end of the other range. - * TODO already defined in lib/pci_caps.c, maybe introduce a file for misc - * utility functions? - */ - return (off1 < (off2 + size2) && off2 < (off1 + size1)); -} - -static bool -maps_over_migr_regs(struct iovec *iov) -{ - return ranges_intersect(0, vfu_get_migr_register_area_size(), - (size_t)iov->iov_base, iov->iov_len); -} - -static bool -validate_sparse_mmaps_for_migr_reg(vfu_reg_info_t *reg) -{ - int i; - - for (i = 0; i < reg->nr_mmap_areas; i++) { - if (maps_over_migr_regs(®->mmap_areas[i])) { - return false; - } - } - return true; -} - EXPORT int vfu_setup_region(vfu_ctx_t *vfu_ctx, int region_idx, size_t size, vfu_region_access_cb_t *cb, int flags, @@ -1926,12 +1922,6 @@ vfu_setup_region(vfu_ctx_t *vfu_ctx, int region_idx, size_t size, return ERROR_INT(EINVAL); } - if (region_idx == VFU_PCI_DEV_MIGR_REGION_IDX && - size < vfu_get_migr_register_area_size()) { - vfu_log(vfu_ctx, LOG_ERR, "invalid migration region size %zu", size); - return ERROR_INT(EINVAL); - } - for (i = 0; i < nr_mmap_areas; i++) { struct iovec *iov = &mmap_areas[i]; if ((size_t)iov_end(iov) > size) { @@ -1963,15 +1953,6 @@ vfu_setup_region(vfu_ctx_t *vfu_ctx, int region_idx, size_t size, } } - if (region_idx == VFU_PCI_DEV_MIGR_REGION_IDX) { - if (!validate_sparse_mmaps_for_migr_reg(reg)) { - vfu_log(vfu_ctx, LOG_ERR, - "migration registers cannot be memory mapped"); - errno = EINVAL; - goto err; - } - } - return 0; err: @@ -2050,33 +2031,27 @@ vfu_setup_irq_state_callback(vfu_ctx_t *vfu_ctx, enum vfu_dev_irq_type type, } EXPORT int -vfu_setup_device_migration_callbacks(vfu_ctx_t *vfu_ctx, - const vfu_migration_callbacks_t *callbacks, - uint64_t data_offset) +vfu_setup_device_migration_callbacks(vfu_ctx_t *vfu_ctx, uint64_t flags, + const vfu_migration_callbacks_t *callbacks) { int ret = 0; assert(vfu_ctx != NULL); assert(callbacks != NULL); - if (vfu_ctx->reg_info[VFU_PCI_DEV_MIGR_REGION_IDX].size == 0) { - vfu_log(vfu_ctx, LOG_ERR, "no device migration region"); - return ERROR_INT(EINVAL); - } - if (callbacks->version != VFU_MIGR_CALLBACKS_VERS) { vfu_log(vfu_ctx, LOG_ERR, "unsupported migration callbacks version %d", callbacks->version); return ERROR_INT(EINVAL); } - vfu_ctx->migration = init_migration(callbacks, data_offset, &ret); + vfu_ctx->migration = init_migration(callbacks, flags, &ret); if (vfu_ctx->migration == NULL) { vfu_log(vfu_ctx, LOG_ERR, "failed to initialize device migration"); return ERROR_INT(ret); } - return 0; + return ret; } #ifdef DEBUG diff --git a/lib/migration.c b/lib/migration.c index 794e7b8..4ab9181 100644 --- a/lib/migration.c +++ b/lib/migration.c @@ -42,14 +42,7 @@ bool MOCK_DEFINE(vfio_migr_state_transition_is_valid)(uint32_t from, uint32_t to) { - return migr_states[from].state & (1 << to); -} - -EXPORT size_t -vfu_get_migr_register_area_size(void) -{ - return ROUND_UP(sizeof(struct vfio_user_migration_info), - sysconf(_SC_PAGE_SIZE)); + return transitions[from][to]; } /* @@ -57,22 +50,19 @@ vfu_get_migr_register_area_size(void) * in vfu_ctx_t. */ struct migration * -init_migration(const vfu_migration_callbacks_t * callbacks, - uint64_t data_offset, int *err) +init_migration(const vfu_migration_callbacks_t *callbacks, + uint64_t flags, int *err) { struct migration *migr; - if (data_offset < vfu_get_migr_register_area_size()) { - *err = EINVAL; - return NULL; - } - migr = calloc(1, sizeof(*migr)); if (migr == NULL) { *err = ENOMEM; return NULL; } + migr->flags = flags; + /* * FIXME: incorrect, if the client doesn't give a pgsize value, it means "no * migration support", handle this @@ -81,13 +71,14 @@ init_migration(const vfu_migration_callbacks_t * callbacks, migr->pgsize = sysconf(_SC_PAGESIZE); /* FIXME this should be done in vfu_ctx_realize */ - migr->info.device_state = VFIO_DEVICE_STATE_V1_RUNNING; - migr->data_offset = data_offset; + if (flags & LIBVFIO_USER_MIG_FLAG_START_RESUMING) { + migr->state = VFIO_DEVICE_STATE_RESUMING; + } else { + migr->state = VFIO_DEVICE_STATE_RUNNING; + } migr->callbacks = *callbacks; if (migr->callbacks.transition == NULL || - migr->callbacks.get_pending_bytes == NULL || - migr->callbacks.prepare_data == NULL || migr->callbacks.read_data == NULL || migr->callbacks.write_data == NULL) { free(migr); @@ -100,35 +91,30 @@ init_migration(const vfu_migration_callbacks_t * callbacks, void MOCK_DEFINE(migr_state_transition)(struct migration *migr, - enum migr_iter_state state) + enum vfio_device_mig_state state) { assert(migr != NULL); /* FIXME validate that state transition */ - migr->iter.state = state; + migr->state = state; } vfu_migr_state_t -MOCK_DEFINE(migr_state_vfio_to_vfu)(uint32_t device_state) +MOCK_DEFINE(migr_state_vfio_to_vfu)(enum vfio_device_mig_state state) { - switch (device_state) { - case VFIO_DEVICE_STATE_V1_STOP: + switch (state) { + case VFIO_DEVICE_STATE_STOP: return VFU_MIGR_STATE_STOP; - case VFIO_DEVICE_STATE_V1_RUNNING: + case VFIO_DEVICE_STATE_RUNNING: return VFU_MIGR_STATE_RUNNING; - case VFIO_DEVICE_STATE_V1_SAVING: - /* - * FIXME How should the device operate during the stop-and-copy - * phase? Should we only allow the migration data to be read from - * the migration region? E.g. Access to any other region should be - * failed? This might be a good question to send to LKML. - */ + case VFIO_DEVICE_STATE_STOP_COPY: return VFU_MIGR_STATE_STOP_AND_COPY; - case VFIO_DEVICE_STATE_V1_RUNNING | VFIO_DEVICE_STATE_V1_SAVING: - return VFU_MIGR_STATE_PRE_COPY; - case VFIO_DEVICE_STATE_V1_RESUMING: + case VFIO_DEVICE_STATE_RESUMING: return VFU_MIGR_STATE_RESUME; + case VFIO_DEVICE_STATE_PRE_COPY: + return VFU_MIGR_STATE_PRE_COPY; + default: + return -1; } - return -1; } /** @@ -165,8 +151,7 @@ MOCK_DEFINE(migr_trans_to_valid_state)(vfu_ctx_t *vfu_ctx, struct migration *mig return ret; } } - migr->info.device_state = device_state; - migr_state_transition(migr, VFIO_USER_MIGR_ITER_STATE_INITIAL); + migr_state_transition(migr, device_state); // TODO confused return 0; } @@ -180,370 +165,128 @@ MOCK_DEFINE(handle_device_state)(vfu_ctx_t *vfu_ctx, struct migration *migr, assert(migr != NULL); - if (!vfio_migr_state_transition_is_valid(migr->info.device_state, - device_state)) { + if (!vfio_migr_state_transition_is_valid(migr->state, device_state)) { return ERROR_INT(EINVAL); } return migr_trans_to_valid_state(vfu_ctx, migr, device_state, notify); } -/** - * Returns 0 on success, -1 on error setting errno. - */ -static ssize_t -handle_pending_bytes(vfu_ctx_t *vfu_ctx, struct migration *migr, - uint64_t *pending_bytes, bool is_write) -{ - assert(migr != NULL); - assert(pending_bytes != NULL); - - if (is_write) { - return ERROR_INT(EINVAL); - } - - if (migr->iter.state == VFIO_USER_MIGR_ITER_STATE_FINISHED) { - *pending_bytes = 0; - return 0; - } - - switch (migr->iter.state) { - case VFIO_USER_MIGR_ITER_STATE_INITIAL: - case VFIO_USER_MIGR_ITER_STATE_DATA_PREPARED: - /* - * FIXME what happens if data haven't been consumed in the previous - * iteration? Check https://www.spinics.net/lists/kvm/msg228608.html. - */ - *pending_bytes = migr->iter.pending_bytes = migr->callbacks.get_pending_bytes(vfu_ctx); - - if (*pending_bytes == 0) { - migr_state_transition(migr, VFIO_USER_MIGR_ITER_STATE_FINISHED); - } else { - migr_state_transition(migr, VFIO_USER_MIGR_ITER_STATE_STARTED); - } - break; - case VFIO_USER_MIGR_ITER_STATE_STARTED: - /* - * FIXME We might be wrong returning a cached value, check - * https://www.spinics.net/lists/kvm/msg228608.html - * - */ - *pending_bytes = migr->iter.pending_bytes; - break; +bool +migration_feature_supported(uint32_t flags) { + switch (flags & VFIO_DEVICE_FEATURE_MASK) { + case VFIO_DEVICE_FEATURE_MIGRATION: + return !(flags & VFIO_DEVICE_FEATURE_SET); // not supported for set + case VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE: + return true; default: - return ERROR_INT(EINVAL); - } - return 0; + return false; + }; } -/* - * FIXME reading or writing migration registers with the wrong device state or - * out of sequence is undefined, but should not result in EINVAL, it should - * simply be ignored. However this way it's easier to catch development errors. - * Make this behavior conditional. - */ - -/** - * Returns 0 on success, -1 on error setting errno. - */ -static ssize_t -handle_data_offset_when_saving(vfu_ctx_t *vfu_ctx, struct migration *migr, - bool is_write) +ssize_t +migration_feature_get(vfu_ctx_t *vfu_ctx, uint32_t flags, void *buf) { - int ret = 0; + struct vfio_user_device_feature_migration *res; + struct vfio_user_device_feature_mig_state *state; - assert(migr != NULL); - - if (is_write) { - vfu_log(vfu_ctx, LOG_ERR, "data_offset is RO when saving"); - return ERROR_INT(EINVAL); - } + switch (flags & VFIO_DEVICE_FEATURE_MASK) { + case VFIO_DEVICE_FEATURE_MIGRATION: + res = buf; + // FIXME are these always supported? Can we consider to be + // "supported" if said support is just an empty callback? + res->flags = VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_PRE_COPY; - switch (migr->iter.state) { - case VFIO_USER_MIGR_ITER_STATE_STARTED: - ret = migr->callbacks.prepare_data(vfu_ctx, &migr->iter.offset, - &migr->iter.size); - if (ret != 0) { - return ret; - } - /* - * FIXME must first read data_offset and then data_size. They way we've - * implemented it now, if data_size is read before data_offset we - * transition to state VFIO_USER_MIGR_ITER_STATE_DATA_PREPARED without - * calling callbacks.prepare_data, which is wrong. Maybe we need - * separate states for data_offset and data_size. - */ - migr_state_transition(migr, VFIO_USER_MIGR_ITER_STATE_DATA_PREPARED); - break; - case VFIO_USER_MIGR_ITER_STATE_DATA_PREPARED: - /* - * data_offset is invariant during a save iteration. - */ - break; - default: - vfu_log(vfu_ctx, LOG_ERR, - "reading data_offset out of sequence is undefined"); - return ERROR_INT(EINVAL); - } + return 0; - return 0; -} + case VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE: + state = buf; + state->device_state = vfu_ctx->migration->state; -/** - * Returns 0 on success, -1 on error setting errno. - */ -static ssize_t -handle_data_offset(vfu_ctx_t *vfu_ctx, struct migration *migr, - uint64_t *offset, bool is_write) -{ - int ret; + return 0; - assert(migr != NULL); - assert(offset != NULL); - - switch (migr->info.device_state) { - case VFIO_DEVICE_STATE_V1_SAVING: - case VFIO_DEVICE_STATE_V1_RUNNING | VFIO_DEVICE_STATE_V1_SAVING: - ret = handle_data_offset_when_saving(vfu_ctx, migr, is_write); - if (ret == 0 && !is_write) { - *offset = migr->iter.offset + migr->data_offset; - } - return ret; - case VFIO_DEVICE_STATE_V1_RESUMING: - if (is_write) { - /* TODO writing to read-only registers should be simply ignored */ - vfu_log(vfu_ctx, LOG_ERR, "bad write to migration data_offset"); - return ERROR_INT(EINVAL); - } - ret = migr->callbacks.prepare_data(vfu_ctx, offset, NULL); - if (ret != 0) { - return ret; - } - *offset += migr->data_offset; - return 0; - } - /* TODO improve error message */ - vfu_log(vfu_ctx, LOG_ERR, - "bad access to migration data_offset in state %s", - migr_states[migr->info.device_state].name); - return ERROR_INT(EINVAL); + default: + return -EINVAL; + }; } -/** - * Returns 0 on success, -1 on failure setting errno. - */ -static ssize_t -handle_data_size_when_saving(vfu_ctx_t *vfu_ctx, struct migration *migr, - bool is_write) +ssize_t +migration_feature_set(vfu_ctx_t *vfu_ctx, uint32_t flags, void *buf) { - assert(migr != NULL); - - if (is_write) { - /* TODO improve error message */ - vfu_log(vfu_ctx, LOG_ERR, "data_size is RO when saving"); - return ERROR_INT(EINVAL); + if (flags & VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE) { + struct vfio_user_device_feature_mig_state *res = buf; + struct migration *migr = vfu_ctx->migration; + + return handle_device_state(vfu_ctx, migr, res->device_state, true); } - if (migr->iter.state != VFIO_USER_MIGR_ITER_STATE_STARTED && - migr->iter.state != VFIO_USER_MIGR_ITER_STATE_DATA_PREPARED) { - vfu_log(vfu_ctx, LOG_ERR, - "reading data_size ouf of sequence is undefined"); - return ERROR_INT(EINVAL); - } - return 0; + return -EINVAL; } -/** - * Returns 0 on success, -1 on error setting errno. - */ -static ssize_t -handle_data_size_when_resuming(vfu_ctx_t *vfu_ctx, struct migration *migr, - uint64_t size, bool is_write) +ssize_t +handle_mig_data_read(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg) { - assert(migr != NULL); + struct migration *migr = vfu_ctx->migration; + struct vfio_user_mig_data_without_data *req = msg->in.iov.iov_base; - if (is_write) { - return migr->callbacks.data_written(vfu_ctx, size); + if (vfu_ctx->migration == NULL) { + return -EINVAL; } - return 0; -} -/** - * Returns 0 on success, -1 on failure setting errno. - */ -static ssize_t -handle_data_size(vfu_ctx_t *vfu_ctx, struct migration *migr, - uint64_t *size, bool is_write) -{ - int ret; + if (migr->state != VFIO_DEVICE_STATE_PRE_COPY + && migr->state != VFIO_DEVICE_STATE_STOP_COPY) { + return -EINVAL; + } - assert(vfu_ctx != NULL); - assert(size != NULL); + msg->out.iov.iov_len = msg->in.iov.iov_len + req->size; + msg->out.iov.iov_base = calloc(1, msg->out.iov.iov_len); - switch (migr->info.device_state){ - case VFIO_DEVICE_STATE_V1_SAVING: - case VFIO_DEVICE_STATE_V1_RUNNING | VFIO_DEVICE_STATE_V1_SAVING: - ret = handle_data_size_when_saving(vfu_ctx, migr, is_write); - if (ret == 0 && !is_write) { - *size = migr->iter.size; - } - return ret; - case VFIO_DEVICE_STATE_V1_RESUMING: - return handle_data_size_when_resuming(vfu_ctx, migr, *size, is_write); - } - /* TODO improve error message */ - vfu_log(vfu_ctx, LOG_ERR, "bad access to data_size"); - return ERROR_INT(EINVAL); -} + struct vfio_user_mig_data_with_data *res = msg->out.iov.iov_base; -/** - * Returns 0 on success, -1 on failure setting errno. - */ -ssize_t -MOCK_DEFINE(migration_region_access_registers)(vfu_ctx_t *vfu_ctx, char *buf, - size_t count, loff_t pos, - bool is_write) -{ - struct migration *migr = vfu_ctx->migration; - int ret; - uint32_t *device_state, old_device_state; + ssize_t ret = migr->callbacks.read_data(vfu_ctx, &res->data, req->size); - assert(migr != NULL); + res->size = ret; + res->argsz = ret + msg->in.iov.iov_len; - switch (pos) { - case offsetof(struct vfio_user_migration_info, device_state): - if (count != sizeof(migr->info.device_state)) { - vfu_log(vfu_ctx, LOG_ERR, - "bad device_state access size %zu", count); - return ERROR_INT(EINVAL); - } - device_state = (uint32_t *)buf; - if (!is_write) { - *device_state = migr->info.device_state; - return 0; - } - old_device_state = migr->info.device_state; - vfu_log(vfu_ctx, LOG_DEBUG, - "migration: transitioning from state %s to state %s", - migr_states[old_device_state].name, - migr_states[*device_state].name); - - ret = handle_device_state(vfu_ctx, migr, *device_state, true); - if (ret == 0) { - vfu_log(vfu_ctx, LOG_DEBUG, - "migration: transitioned from state %s to state %s", - migr_states[old_device_state].name, - migr_states[*device_state].name); - } else { - vfu_log(vfu_ctx, LOG_ERR, - "migration: failed to transition from state %s to state %s", - migr_states[old_device_state].name, - migr_states[*device_state].name); - } - break; - case offsetof(struct vfio_user_migration_info, pending_bytes): - if (count != sizeof(migr->info.pending_bytes)) { - vfu_log(vfu_ctx, LOG_ERR, - "bad pending_bytes access size %zu", count); - return ERROR_INT(EINVAL); - } - ret = handle_pending_bytes(vfu_ctx, migr, (uint64_t *)buf, is_write); - break; - case offsetof(struct vfio_user_migration_info, data_offset): - if (count != sizeof(migr->info.data_offset)) { - vfu_log(vfu_ctx, LOG_ERR, - "bad data_offset access size %zu", count); - return ERROR_INT(EINVAL); - } - ret = handle_data_offset(vfu_ctx, migr, (uint64_t *)buf, is_write); - break; - case offsetof(struct vfio_user_migration_info, data_size): - if (count != sizeof(migr->info.data_size)) { - vfu_log(vfu_ctx, LOG_ERR, - "bad data_size access size %zu", count); - return ERROR_INT(EINVAL); - } - ret = handle_data_size(vfu_ctx, migr, (uint64_t *)buf, is_write); - break; - default: - vfu_log(vfu_ctx, LOG_ERR, - "bad migration region register offset %#llx", - (ull_t)pos); - return ERROR_INT(EINVAL); + if (ret < 0) { + return -1; } + return ret; } ssize_t -migration_region_access(vfu_ctx_t *vfu_ctx, char *buf, size_t count, - loff_t pos, bool is_write) +handle_mig_data_write(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg) { struct migration *migr = vfu_ctx->migration; - ssize_t ret; + struct vfio_user_mig_data_with_data *req = msg->in.iov.iov_base; - assert(migr != NULL); - assert(buf != NULL); - - /* - * FIXME don't call the device callback if the migration state is in not in - * pre-copy/stop-and-copy/resuming state, since the behavior is undefined - * in that case. - */ + if (vfu_ctx->migration == NULL) { + return -EINVAL; + } - if (pos + count <= sizeof(struct vfio_user_migration_info)) { - ret = migration_region_access_registers(vfu_ctx, buf, count, - pos, is_write); - if (ret != 0) { - return ret; - } - } else { + if (migr->state != VFIO_DEVICE_STATE_RESUMING) { + return -EINVAL; + } - if (pos < (loff_t)migr->data_offset) { - /* - * TODO we can simply ignore the access to that part and handle - * any access to the data region properly. - */ - vfu_log(vfu_ctx, LOG_WARNING, - "bad access to dead space %#llx - %#llx in migration region", - (ull_t)pos, - (ull_t)(pos + count - 1)); - return ERROR_INT(EINVAL); - } + ssize_t ret = migr->callbacks.write_data(vfu_ctx, &req->data, req->size); - pos -= migr->data_offset; - if (is_write) { - ret = migr->callbacks.write_data(vfu_ctx, buf, count, pos); - if (ret < 0) { - return -1; - } - } else { - /* - * FIXME <linux/vfio.h> says: - * - * d. Read data_size bytes of data from (region + data_offset) from the - * migration region. - * - * Does this mean that partial reads are not allowed? - */ - ret = migr->callbacks.read_data(vfu_ctx, buf, count, pos); - if (ret < 0) { - return -1; - } - } + if (ret < 0) { + return -1; } - return count; + return ret; } bool MOCK_DEFINE(device_is_stopped_and_copying)(struct migration *migr) { - return migr != NULL && migr->info.device_state == VFIO_DEVICE_STATE_V1_SAVING; + return migr != NULL && migr->state == VFIO_DEVICE_STATE_STOP_COPY; } bool MOCK_DEFINE(device_is_stopped)(struct migration *migr) { - return migr != NULL && migr->info.device_state == VFIO_DEVICE_STATE_V1_STOP; + return migr != NULL && migr->state == VFIO_DEVICE_STATE_STOP; } size_t @@ -568,18 +311,4 @@ migration_set_pgsize(struct migration *migr, size_t pgsize) return 0; } -bool -access_migration_needs_quiesce(const vfu_ctx_t *vfu_ctx, size_t region_index, - uint64_t offset) -{ - /* - * Writing to the migration state register with an unaligned access won't - * trigger this check but that's not a problem because - * migration_region_access_registers will fail the access. - */ - return region_index == VFU_PCI_DEV_MIGR_REGION_IDX - && vfu_ctx->migration != NULL - && offset == offsetof(struct vfio_user_migration_info, device_state); -} - /* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/lib/migration.h b/lib/migration.h index 26fd744..f817f66 100644 --- a/lib/migration.h +++ b/lib/migration.h @@ -46,11 +46,22 @@ struct migration * init_migration(const vfu_migration_callbacks_t *callbacks, - uint64_t data_offset, int *err); + uint64_t flags, int *err); + +bool +migration_feature_supported(uint32_t flags); + +ssize_t +migration_feature_get(vfu_ctx_t *vfu_ctx, uint32_t flags, void *buf); + +ssize_t +migration_feature_set(vfu_ctx_t *vfu_ctx, uint32_t flags, void *buf); + +ssize_t +handle_mig_data_read(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg); ssize_t -migration_region_access(vfu_ctx_t *vfu_ctx, char *buf, size_t count, - loff_t pos, bool is_write); +handle_mig_data_write(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg); bool migration_available(vfu_ctx_t *vfu_ctx); @@ -65,6 +76,12 @@ migration_get_pgsize(struct migration *migr); int migration_set_pgsize(struct migration *migr, size_t pgsize); +uint64_t +migration_get_flags(struct migration *migr); + +MOCK_DECLARE(void, migr_state_transition, struct migration *migr, + enum vfio_device_mig_state state); + MOCK_DECLARE(bool, vfio_migr_state_transition_is_valid, uint32_t from, uint32_t to); diff --git a/lib/migration_priv.h b/lib/migration_priv.h index d5643af..ac58b1f 100644 --- a/lib/migration_priv.h +++ b/lib/migration_priv.h @@ -33,94 +33,26 @@ #include <linux/vfio.h> -/* - * FSM to simplify saving device state. - */ -enum migr_iter_state { - VFIO_USER_MIGR_ITER_STATE_INITIAL, - VFIO_USER_MIGR_ITER_STATE_STARTED, - VFIO_USER_MIGR_ITER_STATE_DATA_PREPARED, - VFIO_USER_MIGR_ITER_STATE_FINISHED -}; - struct migration { - /* - * TODO if the user provides an FD then should mmap it and use the migration - * registers in the file - */ - struct vfio_user_migration_info info; + uint64_t flags; + enum vfio_device_mig_state state; size_t pgsize; vfu_migration_callbacks_t callbacks; - uint64_t data_offset; - - /* - * This is only for the saving state. The resuming state is simpler so we - * don't need it. - */ - struct { - enum migr_iter_state state; - uint64_t pending_bytes; - uint64_t offset; - uint64_t size; - } iter; }; -struct migr_state_data { - uint32_t state; - const char *name; +/* valid migration state transitions + indexed by vfio_device_mig_state enum */ +static const bool transitions[8][8] = { + {0, 0, 0, 0, 0, 0, 0, 0}, // ERROR + {0, 0, 1, 1, 1, 0, 0, 0}, // STOP + {0, 1, 0, 0, 0, 0, 1, 0}, // RUNNING + {0, 1, 0, 0, 0, 0, 0, 0}, // STOP_COPY + {0, 1, 0, 0, 0, 0, 0, 0}, // RESUMING + {0, 0, 0, 0, 0, 0, 0, 0}, // RUNNING_P2P + {0, 0, 1, 1, 0, 0, 0, 0}, // PRE_COPY + {0, 0, 0, 0, 0, 0, 0, 0} // PRE_COPY_P2P }; -#define VFIO_DEVICE_STATE_V1_ERROR (VFIO_DEVICE_STATE_V1_SAVING | VFIO_DEVICE_STATE_V1_RESUMING) - -/* valid migration state transitions */ -static const struct migr_state_data migr_states[(VFIO_DEVICE_STATE_MASK + 1)] = { - [VFIO_DEVICE_STATE_V1_STOP] = { - .state = - (1 << VFIO_DEVICE_STATE_V1_STOP) | - (1 << VFIO_DEVICE_STATE_V1_RUNNING), - .name = "stopped" - }, - [VFIO_DEVICE_STATE_V1_RUNNING] = { - .state = - (1 << VFIO_DEVICE_STATE_V1_STOP) | - (1 << VFIO_DEVICE_STATE_V1_RUNNING) | - (1 << VFIO_DEVICE_STATE_V1_SAVING) | - (1 << (VFIO_DEVICE_STATE_V1_RUNNING | VFIO_DEVICE_STATE_V1_SAVING)) | - (1 << VFIO_DEVICE_STATE_V1_RESUMING) | - (1 << VFIO_DEVICE_STATE_V1_ERROR), - .name = "running" - }, - [VFIO_DEVICE_STATE_V1_SAVING] = { - .state = - (1 << VFIO_DEVICE_STATE_V1_STOP) | - (1 << VFIO_DEVICE_STATE_V1_RUNNING) | - (1 << VFIO_DEVICE_STATE_V1_SAVING) | - (1 << VFIO_DEVICE_STATE_V1_ERROR), - .name = "stop-and-copy" - }, - [VFIO_DEVICE_STATE_V1_RUNNING | VFIO_DEVICE_STATE_V1_SAVING] = { - .state = - (1 << VFIO_DEVICE_STATE_V1_STOP) | - (1 << VFIO_DEVICE_STATE_V1_SAVING) | - (1 << VFIO_DEVICE_STATE_V1_RUNNING | VFIO_DEVICE_STATE_V1_SAVING) | - (1 << VFIO_DEVICE_STATE_V1_ERROR), - .name = "pre-copy" - }, - [VFIO_DEVICE_STATE_V1_RESUMING] = { - .state = - (1 << VFIO_DEVICE_STATE_V1_RUNNING) | - (1 << VFIO_DEVICE_STATE_V1_RESUMING) | - (1 << VFIO_DEVICE_STATE_V1_ERROR), - .name = "resuming" - } -}; - -MOCK_DECLARE(ssize_t, migration_region_access_registers, vfu_ctx_t *vfu_ctx, - char *buf, size_t count, loff_t pos, bool is_write); - -MOCK_DECLARE(void, migr_state_transition, struct migration *migr, - enum migr_iter_state state); - MOCK_DECLARE(vfu_migr_state_t, migr_state_vfio_to_vfu, uint32_t device_state); MOCK_DECLARE(int, state_trans_notify, vfu_ctx_t *vfu_ctx, @@ -129,4 +61,4 @@ MOCK_DECLARE(int, state_trans_notify, vfu_ctx_t *vfu_ctx, #endif -/* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */ +/* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */
\ No newline at end of file |