/* * Copyright (c) 2020 Nutanix Inc. All rights reserved. * * Authors: Thanos Makatos * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Nutanix nor the names of its contributors may be * used to endorse or promote products derived from this software without * specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. * */ #include #include #include #include #include #include "common.h" #include "migration.h" #include "private.h" #include "migration_priv.h" /* * This defines valid migration state transitions. Each element in the array * corresponds to a FROM state and each bit of the element to a TO state. If the * bit is set, then the transition is allowed. * * The indices of each state are those in the vfio_user_device_mig_state enum. */ static const char transitions[VFIO_USER_DEVICE_NUM_STATES] = { [VFIO_USER_DEVICE_STATE_ERROR] = 0, [VFIO_USER_DEVICE_STATE_STOP] = (1 << VFIO_USER_DEVICE_STATE_RUNNING) | (1 << VFIO_USER_DEVICE_STATE_STOP_COPY) | (1 << VFIO_USER_DEVICE_STATE_RESUMING), [VFIO_USER_DEVICE_STATE_RUNNING] = (1 << VFIO_USER_DEVICE_STATE_STOP) | (1 << VFIO_USER_DEVICE_STATE_PRE_COPY), [VFIO_USER_DEVICE_STATE_STOP_COPY] = 1 << VFIO_USER_DEVICE_STATE_STOP, [VFIO_USER_DEVICE_STATE_RESUMING] = 1 << VFIO_USER_DEVICE_STATE_STOP, [VFIO_USER_DEVICE_STATE_RUNNING_P2P] = 0, [VFIO_USER_DEVICE_STATE_PRE_COPY] = (1 << VFIO_USER_DEVICE_STATE_RUNNING) | (1 << VFIO_USER_DEVICE_STATE_STOP_COPY), [VFIO_USER_DEVICE_STATE_PRE_COPY_P2P] = 0 }; /* * The spec dictates that, if no direct transition is allowed, and the * transition is not one of the explicitly disallowed ones (i.e. anything to * ERROR, anything from ERROR, and STOP_COPY -> PRE_COPY), we should take the * shortest allowed path. * * This can be indexed as `next_state[current][target] == next`. If next is * ERROR, then the transition is not allowed. */ static const uint32_t next_state[VFIO_USER_DEVICE_NUM_STATES][VFIO_USER_DEVICE_NUM_STATES] = { [VFIO_USER_DEVICE_STATE_ERROR] = { 0, 0, 0, 0, 0, 0, 0, 0 }, [VFIO_USER_DEVICE_STATE_STOP] = { [VFIO_USER_DEVICE_STATE_ERROR] = VFIO_USER_DEVICE_STATE_ERROR, [VFIO_USER_DEVICE_STATE_STOP] = VFIO_USER_DEVICE_STATE_STOP, [VFIO_USER_DEVICE_STATE_RUNNING] = VFIO_USER_DEVICE_STATE_RUNNING, [VFIO_USER_DEVICE_STATE_STOP_COPY] = VFIO_USER_DEVICE_STATE_STOP_COPY, [VFIO_USER_DEVICE_STATE_RESUMING] = VFIO_USER_DEVICE_STATE_RESUMING, [VFIO_USER_DEVICE_STATE_RUNNING_P2P] = VFIO_USER_DEVICE_STATE_ERROR, [VFIO_USER_DEVICE_STATE_PRE_COPY] = VFIO_USER_DEVICE_STATE_RUNNING, [VFIO_USER_DEVICE_STATE_PRE_COPY_P2P] = VFIO_USER_DEVICE_STATE_ERROR, }, [VFIO_USER_DEVICE_STATE_RUNNING] = { [VFIO_USER_DEVICE_STATE_ERROR] = VFIO_USER_DEVICE_STATE_ERROR, [VFIO_USER_DEVICE_STATE_STOP] = VFIO_USER_DEVICE_STATE_STOP, [VFIO_USER_DEVICE_STATE_RUNNING] = VFIO_USER_DEVICE_STATE_RUNNING, [VFIO_USER_DEVICE_STATE_STOP_COPY] = VFIO_USER_DEVICE_STATE_STOP, [VFIO_USER_DEVICE_STATE_RESUMING] = VFIO_USER_DEVICE_STATE_STOP, [VFIO_USER_DEVICE_STATE_RUNNING_P2P] = VFIO_USER_DEVICE_STATE_ERROR, [VFIO_USER_DEVICE_STATE_PRE_COPY] = VFIO_USER_DEVICE_STATE_PRE_COPY, [VFIO_USER_DEVICE_STATE_PRE_COPY_P2P] = VFIO_USER_DEVICE_STATE_ERROR, }, [VFIO_USER_DEVICE_STATE_STOP_COPY] = { [VFIO_USER_DEVICE_STATE_ERROR] = VFIO_USER_DEVICE_STATE_ERROR, [VFIO_USER_DEVICE_STATE_STOP] = VFIO_USER_DEVICE_STATE_STOP, [VFIO_USER_DEVICE_STATE_RUNNING] = VFIO_USER_DEVICE_STATE_STOP, [VFIO_USER_DEVICE_STATE_STOP_COPY] = VFIO_USER_DEVICE_STATE_STOP_COPY, [VFIO_USER_DEVICE_STATE_RESUMING] = VFIO_USER_DEVICE_STATE_STOP, [VFIO_USER_DEVICE_STATE_RUNNING_P2P] = VFIO_USER_DEVICE_STATE_ERROR, [VFIO_USER_DEVICE_STATE_PRE_COPY] = VFIO_USER_DEVICE_STATE_ERROR, [VFIO_USER_DEVICE_STATE_PRE_COPY_P2P] = VFIO_USER_DEVICE_STATE_ERROR, }, [VFIO_USER_DEVICE_STATE_RESUMING] = { [VFIO_USER_DEVICE_STATE_ERROR] = VFIO_USER_DEVICE_STATE_ERROR, [VFIO_USER_DEVICE_STATE_STOP] = VFIO_USER_DEVICE_STATE_STOP, [VFIO_USER_DEVICE_STATE_RUNNING] = VFIO_USER_DEVICE_STATE_STOP, [VFIO_USER_DEVICE_STATE_STOP_COPY] = VFIO_USER_DEVICE_STATE_STOP, [VFIO_USER_DEVICE_STATE_RESUMING] = VFIO_USER_DEVICE_STATE_RESUMING, [VFIO_USER_DEVICE_STATE_RUNNING_P2P] = VFIO_USER_DEVICE_STATE_ERROR, [VFIO_USER_DEVICE_STATE_PRE_COPY] = VFIO_USER_DEVICE_STATE_STOP, [VFIO_USER_DEVICE_STATE_PRE_COPY_P2P] = VFIO_USER_DEVICE_STATE_ERROR, }, [VFIO_USER_DEVICE_STATE_RUNNING_P2P] = { 0, 0, 0, 0, 0, 0, 0, 0 }, [VFIO_USER_DEVICE_STATE_PRE_COPY] = { [VFIO_USER_DEVICE_STATE_ERROR] = VFIO_USER_DEVICE_STATE_ERROR, [VFIO_USER_DEVICE_STATE_STOP] = VFIO_USER_DEVICE_STATE_RUNNING, [VFIO_USER_DEVICE_STATE_RUNNING] = VFIO_USER_DEVICE_STATE_RUNNING, [VFIO_USER_DEVICE_STATE_STOP_COPY] = VFIO_USER_DEVICE_STATE_STOP_COPY, [VFIO_USER_DEVICE_STATE_RESUMING] = VFIO_USER_DEVICE_STATE_RUNNING, [VFIO_USER_DEVICE_STATE_RUNNING_P2P] = VFIO_USER_DEVICE_STATE_ERROR, [VFIO_USER_DEVICE_STATE_PRE_COPY] = VFIO_USER_DEVICE_STATE_PRE_COPY, [VFIO_USER_DEVICE_STATE_PRE_COPY_P2P] = VFIO_USER_DEVICE_STATE_ERROR, }, [VFIO_USER_DEVICE_STATE_PRE_COPY_P2P] = { 0, 0, 0, 0, 0, 0, 0, 0 }, }; bool MOCK_DEFINE(vfio_migr_state_transition_is_valid)(uint32_t from, uint32_t to) { return from < VFIO_USER_DEVICE_NUM_STATES && to < VFIO_USER_DEVICE_NUM_STATES && (transitions[from] & (1 << to)) != 0; } /* * TODO no need to dynamically allocate memory, we can keep struct migration * in vfu_ctx_t. */ struct migration * init_migration(const vfu_migration_callbacks_t *callbacks, int *err) { struct migration *migr; migr = calloc(1, sizeof(*migr)); if (migr == NULL) { *err = ENOMEM; return NULL; } /* * FIXME: incorrect, if the client doesn't give a pgsize value, it means "no * migration support", handle this * FIXME must be available even if migration callbacks aren't used */ migr->pgsize = sysconf(_SC_PAGESIZE); /* FIXME this should be done in vfu_ctx_realize */ migr->state = VFIO_USER_DEVICE_STATE_RUNNING; migr->callbacks = *callbacks; if (migr->callbacks.transition == NULL || migr->callbacks.read_data == NULL || migr->callbacks.write_data == NULL || migr->callbacks.version != VFU_MIGR_CALLBACKS_VERS) { free(migr); *err = EINVAL; return NULL; } return migr; } void MOCK_DEFINE(migr_state_transition)(struct migration *migr, enum vfio_user_device_mig_state state) { assert(migr != NULL); migr->state = state; } vfu_migr_state_t MOCK_DEFINE(migr_state_vfio_to_vfu)(uint32_t state) { switch (state) { case VFIO_USER_DEVICE_STATE_STOP: return VFU_MIGR_STATE_STOP; case VFIO_USER_DEVICE_STATE_RUNNING: return VFU_MIGR_STATE_RUNNING; case VFIO_USER_DEVICE_STATE_STOP_COPY: return VFU_MIGR_STATE_STOP_AND_COPY; case VFIO_USER_DEVICE_STATE_RESUMING: return VFU_MIGR_STATE_RESUME; case VFIO_USER_DEVICE_STATE_PRE_COPY: return VFU_MIGR_STATE_PRE_COPY; default: return -1; } } /** * Returns 0 on success, -1 on error setting errno. */ int MOCK_DEFINE(state_trans_notify)(vfu_ctx_t *vfu_ctx, int (*fn)(vfu_ctx_t *, vfu_migr_state_t), uint32_t vfio_device_state) { /* * We've already checked that device_state is valid by calling * vfio_migr_state_transition_is_valid. */ return fn(vfu_ctx, migr_state_vfio_to_vfu(vfio_device_state)); } /** * Returns 0 on success, -1 on failure setting errno. */ ssize_t MOCK_DEFINE(migr_trans_to_valid_state)(vfu_ctx_t *vfu_ctx, struct migration *migr, uint32_t device_state, bool notify) { if (notify) { int ret; assert(!vfu_ctx->in_cb); vfu_ctx->in_cb = CB_MIGR_STATE; ret = state_trans_notify(vfu_ctx, migr->callbacks.transition, device_state); vfu_ctx->in_cb = CB_NONE; if (ret != 0) { return ret; } } migr_state_transition(migr, device_state); return 0; } /** * Returns 0 on success, -1 on failure setting errno. */ ssize_t MOCK_DEFINE(handle_device_state)(vfu_ctx_t *vfu_ctx, struct migration *migr, uint32_t device_state, bool notify) { assert(vfu_ctx != NULL); assert(migr != NULL); if (!vfio_migr_state_transition_is_valid(migr->state, device_state)) { return ERROR_INT(EINVAL); } return migr_trans_to_valid_state(vfu_ctx, migr, device_state, notify); } size_t migration_get_state(vfu_ctx_t *vfu_ctx) { return vfu_ctx->migration->state; } ssize_t migration_set_state(vfu_ctx_t *vfu_ctx, uint32_t device_state) { struct migration *migr = vfu_ctx->migration; uint32_t state; ssize_t ret = 0; if (device_state > VFIO_USER_DEVICE_NUM_STATES) { return ERROR_INT(EINVAL); } while (migr->state != device_state && ret == 0) { state = next_state[migr->state][device_state]; if (state == VFIO_USER_DEVICE_STATE_ERROR) { return ERROR_INT(EINVAL); } ret = handle_device_state(vfu_ctx, migr, state, true); }; return ret; } ssize_t handle_mig_data_read(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg) { assert(vfu_ctx != NULL); assert(msg != NULL); if (msg->in.iov.iov_len < sizeof(struct vfio_user_mig_data)) { vfu_log(vfu_ctx, LOG_ERR, "message too short (%ld)", msg->in.iov.iov_len); return ERROR_INT(EINVAL); } struct migration *migr = vfu_ctx->migration; struct vfio_user_mig_data *req = msg->in.iov.iov_base; if (vfu_ctx->migration == NULL) { vfu_log(vfu_ctx, LOG_ERR, "migration not enabled"); return ERROR_INT(EINVAL); } if (migr->state != VFIO_USER_DEVICE_STATE_PRE_COPY && migr->state != VFIO_USER_DEVICE_STATE_STOP_COPY) { vfu_log(vfu_ctx, LOG_ERR, "bad migration state to read data: %d", migr->state); return ERROR_INT(EINVAL); } if (req->size > vfu_ctx->client_max_data_xfer_size) { vfu_log(vfu_ctx, LOG_ERR, "transfer size exceeds limit (%d > %ld)", req->size, vfu_ctx->client_max_data_xfer_size); return ERROR_INT(EINVAL); } if (req->argsz < sizeof(struct vfio_user_mig_data) + req->size) { vfu_log(vfu_ctx, LOG_ERR, "argsz too small (%d < %ld)", req->argsz, sizeof(struct vfio_user_mig_data) + req->size); return ERROR_INT(EINVAL); } msg->out.iov.iov_len = msg->in.iov.iov_len + req->size; msg->out.iov.iov_base = calloc(1, msg->out.iov.iov_len); if (msg->out.iov.iov_base == NULL) { return ERROR_INT(ENOMEM); } struct vfio_user_mig_data *res = msg->out.iov.iov_base; ssize_t ret = migr->callbacks.read_data(vfu_ctx, &res->data, req->size); if (ret < 0) { vfu_log(vfu_ctx, LOG_ERR, "read_data callback failed, errno=%d", errno); iov_free(&msg->out.iov); return ret; } res->size = ret; res->argsz = sizeof(struct vfio_user_mig_data) + ret; return 0; } ssize_t handle_mig_data_write(vfu_ctx_t *vfu_ctx, vfu_msg_t *msg) { assert(vfu_ctx != NULL); assert(msg != NULL); if (msg->in.iov.iov_len < sizeof(struct vfio_user_mig_data)) { vfu_log(vfu_ctx, LOG_ERR, "message too short (%ld)", msg->in.iov.iov_len); return ERROR_INT(EINVAL); } struct migration *migr = vfu_ctx->migration; struct vfio_user_mig_data *req = msg->in.iov.iov_base; if (vfu_ctx->migration == NULL) { vfu_log(vfu_ctx, LOG_ERR, "migration not enabled"); return ERROR_INT(EINVAL); } if (migr->state != VFIO_USER_DEVICE_STATE_RESUMING) { vfu_log(vfu_ctx, LOG_ERR, "bad migration state to write data: %d", migr->state); return ERROR_INT(EINVAL); } if (req->size > vfu_ctx->client_max_data_xfer_size) { vfu_log(vfu_ctx, LOG_ERR, "transfer size exceeds limit (%d > %ld)", req->size, vfu_ctx->client_max_data_xfer_size); return ERROR_INT(EINVAL); } if (req->argsz < sizeof(struct vfio_user_mig_data) + req->size) { vfu_log(vfu_ctx, LOG_ERR, "argsz too small (%d < %ld)", req->argsz, sizeof(struct vfio_user_mig_data) + req->size); return ERROR_INT(EINVAL); } if (msg->in.iov.iov_len < sizeof(struct vfio_user_mig_data) + req->size) { vfu_log(vfu_ctx, LOG_ERR, "short write (%d < %ld)", req->argsz, sizeof(struct vfio_user_mig_data) + req->size); return ERROR_INT(EINVAL); } ssize_t ret = migr->callbacks.write_data(vfu_ctx, &req->data, req->size); if (ret < 0) { vfu_log(vfu_ctx, LOG_ERR, "write_data callback failed, errno=%d", errno); return ret; } else if (ret != req->size) { vfu_log(vfu_ctx, LOG_ERR, "migration data partial write of size=%ld", ret); return ERROR_INT(EINVAL); } return 0; } bool MOCK_DEFINE(device_is_stopped_and_copying)(struct migration *migr) { return migr != NULL && migr->state == VFIO_USER_DEVICE_STATE_STOP_COPY; } bool MOCK_DEFINE(device_is_stopped)(struct migration *migr) { return migr != NULL && migr->state == VFIO_USER_DEVICE_STATE_STOP; } size_t migration_get_pgsize(struct migration *migr) { assert(migr != NULL); return migr->pgsize; } int migration_set_pgsize(struct migration *migr, size_t pgsize) { assert(migr != NULL); // FIXME? if (pgsize != PAGE_SIZE) { return ERROR_INT(EINVAL); } migr->pgsize = pgsize; return 0; } bool migration_feature_needs_quiesce(struct vfio_user_device_feature *feature) { return ((feature->flags & (VFIO_DEVICE_FEATURE_SET | VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE)) != 0) && !(feature->flags & VFIO_DEVICE_FEATURE_PROBE); } /* ex: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */