diff options
-rw-r--r-- | lib/libmuser.c | 304 | ||||
-rw-r--r-- | lib/libmuser_pci.c | 1 | ||||
-rw-r--r-- | lib/muser.h | 59 | ||||
-rw-r--r-- | samples/client.c | 228 | ||||
-rw-r--r-- | samples/server.c | 76 |
5 files changed, 629 insertions, 39 deletions
diff --git a/lib/libmuser.c b/lib/libmuser.c index 2d0805f..23c1f6a 100644 --- a/lib/libmuser.c +++ b/lib/libmuser.c @@ -87,6 +87,13 @@ typedef struct { int efds[0]; /* XXX must be last */ } lm_irqs_t; +enum migration_iteration_state { + VFIO_USER_MIGRATION_ITERATION_STATE_INITIAL, + VFIO_USER_MIGRATION_ITERATION_STATE_STARTED, + VFIO_USER_MIGRATION_ITERATION_STATE_DATA_PREPARED, + VFIO_USER_MIGRATION_ITERATION_STATE_FINISHED +}; + struct lm_ctx { void *pvt; dma_controller_t *dma; @@ -112,7 +119,16 @@ struct lm_ctx { int client_max_fds; - size_t migration_pgsize; + struct { + struct vfio_device_migration_info info; + size_t pgsize; + lm_migration_callbacks_t callbacks; + struct { + enum migration_iteration_state state; + __u64 offset; + __u64 size; + } iter; + } migration; lm_irqs_t irqs; /* XXX must be last */ }; @@ -507,7 +523,7 @@ set_version(lm_ctx_t *lm_ctx, int sock) } ret = recv_version(sock, &client_mj, &client_mn, &msg_id, true, - &lm_ctx->client_max_fds, &lm_ctx->migration_pgsize); + &lm_ctx->client_max_fds, &lm_ctx->migration.pgsize); if (ret < 0) { lm_log(lm_ctx, LM_DBG, "failed to receive version: %s", strerror(-ret)); goto out; @@ -520,7 +536,7 @@ set_version(lm_ctx_t *lm_ctx, int sock) ret = -EINVAL; goto out; } - if (lm_ctx->migration_pgsize == 0) { + if (lm_ctx->migration.pgsize == 0) { lm_log(lm_ctx, LM_ERR, "bad migration page size"); ret = -EINVAL; goto out; @@ -528,7 +544,7 @@ set_version(lm_ctx_t *lm_ctx, int sock) /* FIXME need to check max_fds */ - lm_ctx->migration_pgsize = MIN(lm_ctx->migration_pgsize, + lm_ctx->migration.pgsize = MIN(lm_ctx->migration.pgsize, sysconf(_SC_PAGESIZE)); out: free(server_caps); @@ -1541,6 +1557,259 @@ handle_pci_config_space_access(lm_ctx_t *lm_ctx, char *buf, size_t count, return count; } +/* valid migration state transitions */ +__u32 migration_states[VFIO_DEVICE_STATE_MASK] = { + [VFIO_DEVICE_STATE_STOP] = 1 << VFIO_DEVICE_STATE_STOP, + [VFIO_DEVICE_STATE_RUNNING] = /* running */ + (1 << VFIO_DEVICE_STATE_STOP) | + (1 << VFIO_DEVICE_STATE_RUNNING) | + (1 << VFIO_DEVICE_STATE_SAVING) | + (1 << (VFIO_DEVICE_STATE_RUNNING | VFIO_DEVICE_STATE_SAVING)) | + (1 << VFIO_DEVICE_STATE_RESUMING), + [VFIO_DEVICE_STATE_SAVING] = /* stop-and-copy */ + (1 << VFIO_DEVICE_STATE_STOP) | + (1 << VFIO_DEVICE_STATE_SAVING), + [VFIO_DEVICE_STATE_RUNNING | VFIO_DEVICE_STATE_SAVING] = /* pre-copy */ + (1 << VFIO_DEVICE_STATE_SAVING) | + (1 << VFIO_DEVICE_STATE_RUNNING | VFIO_DEVICE_STATE_SAVING), + [VFIO_DEVICE_STATE_RESUMING] = /* resuming */ + (1 << VFIO_DEVICE_STATE_RUNNING) | + (1 << VFIO_DEVICE_STATE_RESUMING) +}; + +static bool +_migration_state_transition_is_valid(__u32 from, __u32 to) +{ + return migration_states[from] & (1 << to); +} + +static ssize_t +handle_migration_device_state(lm_ctx_t *lm_ctx, __u32 *device_state, + bool is_write) { + + int ret; + + assert(lm_ctx != NULL); + assert(device_state != NULL); + + if (!is_write) { + *device_state = lm_ctx->migration.info.device_state; + return 0; + } + + if (*device_state & ~VFIO_DEVICE_STATE_MASK) { + return -EINVAL; + } + + if (!_migration_state_transition_is_valid(lm_ctx->migration.info.device_state, + *device_state)) { + return -EINVAL; + } + + switch (*device_state) { + case VFIO_DEVICE_STATE_STOP: + ret = lm_ctx->migration.callbacks.transition(lm_ctx->pvt, + LM_MIGR_STATE_STOP); + break; + case VFIO_DEVICE_STATE_RUNNING: + ret = lm_ctx->migration.callbacks.transition(lm_ctx->pvt, + LM_MIGR_STATE_START); + break; + case VFIO_DEVICE_STATE_SAVING: + /* + * FIXME How should the device operate during the stop-and-copy + * phase? Should we only allow the migration data to be read from + * the migration region? E.g. Access to any other region should be + * failed? This might be a good question to send to LKML. + */ + ret = lm_ctx->migration.callbacks.transition(lm_ctx->pvt, + LM_MIGR_STATE_STOP_AND_COPY); + break; + case VFIO_DEVICE_STATE_RUNNING | VFIO_DEVICE_STATE_SAVING: + ret = lm_ctx->migration.callbacks.transition(lm_ctx->pvt, + LM_MIGR_STATE_PRE_COPY); + break; + case VFIO_DEVICE_STATE_RESUMING: + ret = lm_ctx->migration.callbacks.transition(lm_ctx->pvt, + LM_MIGR_STATE_RESUME); + break; + default: + ret = -EINVAL; + } + + if (ret == 0) { + lm_ctx->migration.info.device_state = *device_state; + } + + return ret; +} + +static ssize_t +handle_migration_pending_bytes(lm_ctx_t *lm_ctx, __u64 *pending_bytes, + bool is_write) +{ + assert(lm_ctx != NULL); + assert(pending_bytes != NULL); + + if (is_write) { + return -EINVAL; + } + + if (lm_ctx->migration.iter.state == VFIO_USER_MIGRATION_ITERATION_STATE_FINISHED) { + *pending_bytes = 0; + return 0; + } + + *pending_bytes = lm_ctx->migration.callbacks.get_pending_bytes(lm_ctx->pvt); + + switch (lm_ctx->migration.iter.state) { + case VFIO_USER_MIGRATION_ITERATION_STATE_INITIAL: + case VFIO_USER_MIGRATION_ITERATION_STATE_DATA_PREPARED: + /* + * FIXME what happens if data haven't been consumed in the previous + * iteration? Ask on LKML. + */ + if (*pending_bytes == 0) { + lm_ctx->migration.iter.state = VFIO_USER_MIGRATION_ITERATION_STATE_FINISHED; + } else { + lm_ctx->migration.iter.state = VFIO_USER_MIGRATION_ITERATION_STATE_STARTED; + } + break; + case VFIO_USER_MIGRATION_ITERATION_STATE_STARTED: + /* + * Repeated reads of pending_bytes should not have any side effects. + * FIXME does it have to be the same as the previous value? Can it + * increase or even decrease? I suppose it can't be lower than + * data_size? Ask on LKML. + */ + break; + default: + return -EINVAL; + } + return 0; +} + +static ssize_t +handle_migration_data_offset(lm_ctx_t *lm_ctx, __u64 *offset, bool is_write) +{ + int ret; + + assert(lm_ctx != NULL); + assert(offset != NULL); + + if (is_write) { + return -EINVAL; + } + + switch (lm_ctx->migration.iter.state) { + case VFIO_USER_MIGRATION_ITERATION_STATE_STARTED: + break; + default: + /* + * FIXME it's not clear whether these registers can be accessed in + * other parts of the iteration, need clarification on the + * following: + * + * Read on data_offset and data_size should return the offset and + * size of the current buffer if the user application reads + * data_offset and data_size more than once here. + */ + return -EINVAL; + } + + ret = lm_ctx->migration.callbacks.prepare_data(lm_ctx->pvt, + &lm_ctx->migration.iter.offset, + &lm_ctx->migration.iter.size); + if (ret < 0) { + return ret; + } + + *offset = lm_ctx->migration.iter.offset + sizeof(struct vfio_device_migration_info); + + return ret; +} + +static ssize_t +handle_migration_data_size(lm_ctx_t *lm_ctx, __u64 *size, bool is_write) +{ + assert(lm_ctx != NULL); + assert(size != NULL); + + if (is_write) { + return -EINVAL; + } + + switch (lm_ctx->migration.iter.state) { + case VFIO_USER_MIGRATION_ITERATION_STATE_STARTED: + break; + default: + /* FIXME see comment in handle_migration_data_offset */ + return -EINVAL; + } + + *size = lm_ctx->migration.iter.size; + + return 0; +} + +static ssize_t +handle_migration_region_access(lm_ctx_t *lm_ctx, char *buf, size_t count, + loff_t pos, bool is_write) +{ + int ret; + + assert(lm_ctx != NULL); + assert(buf != NULL); + + if (pos + count > lm_ctx->pci_info.reg_info[LM_DEV_MIGRATION_REG_IDX].size) { + lm_log(lm_ctx, LM_ERR, "read %#x-%#x past end of migration region", + pos, pos + count - 1); + return -EINVAL; + } + switch (pos) { + case offsetof(struct vfio_device_migration_info, device_state): + if (count != sizeof(lm_ctx->migration.info.device_state)) { + return -EINVAL; + } + ret = handle_migration_device_state(lm_ctx, (__u32*)buf, + is_write); + break; + case offsetof(struct vfio_device_migration_info, pending_bytes): + if (count != sizeof(lm_ctx->migration.info.pending_bytes)) { + return -EINVAL; + } + ret = handle_migration_pending_bytes(lm_ctx, (__u64*)buf, is_write); + break; + case offsetof(struct vfio_device_migration_info, data_offset): + if (count != sizeof(lm_ctx->migration.info.data_offset)) { + return -EINVAL; + } + ret = handle_migration_data_offset(lm_ctx, (__u64*)buf, is_write); + break; + case offsetof(struct vfio_device_migration_info, data_size): + if (count != sizeof(lm_ctx->migration.info.data_size)) { + return -EINVAL; + } + ret = handle_migration_data_size(lm_ctx, (__u64*)buf, is_write); + break; + default: + if (is_write) { + /* FIXME how do we handle the offset? */ + ret = lm_ctx->migration.callbacks.write_data(lm_ctx->pvt, + buf, count); + } else { + ret = lm_ctx->migration.callbacks.read_data(lm_ctx->pvt, + buf, count, + pos - sizeof(struct vfio_device_migration_info)); + } + } + + if (ret == 0) { + ret = count; + } + return ret; +} + static ssize_t do_access(lm_ctx_t *lm_ctx, char *buf, size_t count, loff_t pos, bool is_write) { @@ -1569,6 +1838,11 @@ do_access(lm_ctx_t *lm_ctx, char *buf, size_t count, loff_t pos, bool is_write) is_write); } + if (idx == LM_DEV_MIGRATION_REG_IDX) { + return handle_migration_region_access(lm_ctx, buf, count, offset, + is_write); + } + /* * Checking whether a callback exists might sound expensive however this * code is not performance critical. This works well when we don't expect a @@ -2098,7 +2372,7 @@ handle_dirty_pages_get(lm_ctx_t *lm_ctx, struct vfio_user_header *hdr, goto out; } *nr_iovecs = 1 + size / sizeof(struct vfio_iommu_type1_dirty_bitmap_get); - *iovecs = malloc(*nr_iovecs); + *iovecs = malloc(*nr_iovecs * sizeof(struct iovec)); if (*iovecs == NULL) { ret = -errno; goto out; @@ -2154,7 +2428,7 @@ handle_dirty_pages(lm_ctx_t *lm_ctx, struct vfio_user_header *hdr, if (dirty_bitmap.flags & VFIO_IOMMU_DIRTY_PAGES_FLAG_START) { ret = dma_controller_dirty_page_logging_start(lm_ctx->dma, - lm_ctx->migration_pgsize); + lm_ctx->migration.pgsize); } else if (dirty_bitmap.flags & VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP) { ret = dma_controller_dirty_page_logging_stop(lm_ctx->dma); } else if (dirty_bitmap.flags & VFIO_IOMMU_DIRTY_PAGES_FLAG_GET_BITMAP) { @@ -2188,9 +2462,15 @@ process_request(lm_ctx_t *lm_ctx) assert(lm_ctx != NULL); + if (lm_ctx->pci_info.reg_info[LM_DEV_CFG_REG_IDX].size > 0 && + lm_ctx->migration.info.device_state == VFIO_DEVICE_STATE_STOP) { + return -ESHUTDOWN; + } + nr_fds = lm_ctx->client_max_fds; fds = alloca(nr_fds * sizeof(int)); + /* FIXME get request shouldn't set errno, it should return it as -errno */ ret = transports_ops[lm_ctx->trans].get_request(lm_ctx, &hdr, fds, &nr_fds); if (unlikely(ret < 0)) { if (ret == -EAGAIN || ret == -EWOULDBLOCK) { @@ -2581,6 +2861,18 @@ pci_config_setup(lm_ctx_t *lm_ctx, const lm_dev_info_t *dev_info) if (migr_reg->size < sizeof(struct vfio_device_migration_info)) { return -EINVAL; } + + /* FIXME this should be done in lm_ctx_run or poll */ + lm_ctx->migration.info.device_state = VFIO_DEVICE_STATE_RUNNING; + + lm_ctx->migration.callbacks = dev_info->migration_callbacks; + if (lm_ctx->migration.callbacks.transition == NULL || + lm_ctx->migration.callbacks.get_pending_bytes == NULL || + lm_ctx->migration.callbacks.prepare_data == NULL || + lm_ctx->migration.callbacks.read_data == NULL || + lm_ctx->migration.callbacks.write_data == NULL) { + return -EINVAL; + } } return 0; diff --git a/lib/libmuser_pci.c b/lib/libmuser_pci.c index 78d65fa..711acc4 100644 --- a/lib/libmuser_pci.c +++ b/lib/libmuser_pci.c @@ -314,6 +314,7 @@ muser_is_pci_hdr_access(loff_t pos) return pos - off >= 0 && pos - off < PCI_STD_HEADER_SIZEOF; } +/* FIXME this function is misleading, remove it */ int muser_pci_hdr_access(lm_ctx_t *lm_ctx, size_t *count, loff_t *pos, bool is_write, diff --git a/lib/muser.h b/lib/muser.h index 375be0e..949680c 100644 --- a/lib/muser.h +++ b/lib/muser.h @@ -139,6 +139,8 @@ typedef struct { /* * Callback function that is called when the region is read or written. + * Note that the memory of the region is owned by the user, except for the + * standard header (first 64 bytes) of the PCI configuration space. */ lm_region_access_t *fn; @@ -260,6 +262,62 @@ typedef enum { #define LM_MAX_CAPS (PCI_CFG_SPACE_SIZE - PCI_STD_HEADER_SIZEOF) / PCI_CAP_SIZEOF +/* + * FIXME the names of migration callback functions are probably far too long, + * but for now it helps with the implementation. + */ +typedef int (lm_migration_callback_t)(void *pvt); + +typedef enum { + LM_MIGR_STATE_STOP, + LM_MIGR_STATE_START, + LM_MIGR_STATE_STOP_AND_COPY, + LM_MIGR_STATE_PRE_COPY, + LM_MIGR_STATE_RESUME +} lm_migr_state_t; + +typedef struct { + + /* migration state transition callback */ + /* TODO rename to lm_migration_state_transition_callback */ + /* FIXME maybe we should create a single callback and pass the state? */ + int (*transition)(void *pvt, lm_migr_state_t state); + + /* Callbacks for saving device state */ + + /* + * Function that is called to retrieve pending migration data. If migration + * data were previously made available (function prepare_data has been + * called) then calling this function signifies that they have been read + * (e.g. migration data can be discarded). If the function returns 0 then + * migration has finished and this function won't be called again. + */ + __u64 (*get_pending_bytes)(void *pvt); + + /* + * Function that is called to instruct the device to prepare migration data. + * The function must return only after migration data are available at the + * specified offset. + */ + int (*prepare_data)(void *pvt, __u64 *offset, __u64 *size); + + /* + * Function that is called to read migration data. offset and size can + * be any subrange on the offset and size previously returned by + * prepare_data. The function must return the amount of data read. This + * function can be called even if the migration data can be memory mapped. + * + * Does this mean that reading data_offset/data_size updates the values? + */ + size_t (*read_data)(void *pvt, void *buf, __u64 count, __u64 offset); + + /* Callback for restoring device state */ + + /* Fuction that is called for writing previously stored device state. */ + size_t (*write_data)(void *pvt, void *data, __u64 size); + +} lm_migration_callbacks_t; + /** * Device information structure, used to create the lm_ctx. * To be filled and passed to lm_ctx_create() @@ -326,6 +384,7 @@ typedef struct { int nr_caps; lm_cap_t **caps; + lm_migration_callbacks_t migration_callbacks; } lm_dev_info_t; diff --git a/samples/client.c b/samples/client.c index 5ff79cd..0b42267 100644 --- a/samples/client.c +++ b/samples/client.c @@ -328,51 +328,93 @@ configure_irqs(int sock) } static int -access_bar0(int sock) +access_region(int sock, int region, bool is_write, uint64_t offset, + void *data, size_t data_len) { - struct { - struct vfio_user_region_access region_access; - time_t t; - } __attribute__((packed)) data = { - .region_access = { - .region = LM_DEV_BAR0_REG_IDX, - .count = sizeof(data.t) + struct vfio_user_region_access send_region_access = { + .offset = offset, + .region = region, + .count = data_len + }; + struct iovec send_iovecs[3] = { + [1] = { + .iov_base = &send_region_access, + .iov_len = sizeof send_region_access }, - .t = time(NULL) + [2] = { + .iov_base = data, + .iov_len = data_len + } }; - uint16_t msg_id = 1; - const int sleep_time = 1; - struct vfio_user_region_access region_access = {}; - int ret = send_recv_vfio_user_msg(sock, msg_id, VFIO_USER_REGION_WRITE, - &data, sizeof data, NULL, 0, NULL, - ®ion_access, sizeof region_access); - if (ret < 0) { - fprintf(stderr, "failed to write to BAR0: %s\n", strerror(-ret)); + struct { + struct vfio_user_region_access region_access; + char data[data_len]; + } __attribute__((packed)) recv_data; + int op, ret; + size_t nr_send_iovecs, recv_data_len; + + if (is_write) { + op = VFIO_USER_REGION_WRITE; + nr_send_iovecs = 3; + recv_data_len = sizeof(recv_data.region_access); + } else { + op = VFIO_USER_REGION_READ; + nr_send_iovecs = 2; + recv_data_len = sizeof(recv_data); + } + + ret = _send_recv_vfio_user_msg(sock, 0, op, + send_iovecs, nr_send_iovecs, + NULL, 0, NULL, + &recv_data, recv_data_len); + if (ret != 0) { + fprintf(stderr, "failed to %s region %d %#lx-%#lx: %s\n", + is_write ? "write to" : "read from", region, offset, + offset + data_len - 1, strerror(-ret)); return ret; } - if (region_access.count != sizeof data.t) { - fprintf(stderr, "bad written data length %d\n", region_access.count); + if (recv_data.region_access.count != data_len) { + fprintf(stderr, "bad %s data count, expected=%d, actual=%d\n", + is_write ? "write" : "read", data_len, + recv_data.region_access.count); return -EINVAL; } - printf("wrote to BAR0: %ld\n", data.t); + /* + * TODO we could avoid the memcpy if _sed_recv_vfio_user_msg received the + * response into an iovec, but it's some work to implement it. + */ + if (!is_write) { + memcpy(data, recv_data.data, data_len); + } + return 0; +} - msg_id++; +static int +access_bar0(int sock) +{ + time_t t = time(NULL); + const int sleep_time = 1; + int ret = access_region(sock, LM_DEV_BAR0_REG_IDX, true, 0, &t, sizeof t); + + if (ret < 0) { + fprintf(stderr, "failed to write to BAR0: %s\n", strerror(-ret)); + return ret; + } + + printf("wrote to BAR0: %ld\n", t); sleep(sleep_time); - ret = send_recv_vfio_user_msg(sock, msg_id, VFIO_USER_REGION_READ, - &data.region_access, sizeof data.region_access, - NULL, 0, NULL, &data, sizeof data); + ret = access_region(sock, LM_DEV_BAR0_REG_IDX, false, 0, &t, sizeof t); if (ret < 0) { fprintf(stderr, "failed to read from BAR0: %s\n", strerror(-ret)); return ret; } - assert(data.region_access.count == sizeof data.t); - printf("read from BAR0: %ld\n", data.t); + printf("read from BAR0: %ld\n", t); - assert(data.t >= sleep_time); + assert(t >= sleep_time); return 0; } @@ -550,6 +592,105 @@ get_dirty_bitmaps(int sock, struct vfio_user_dma_region *dma_regions, return 0; } +enum migration { + NO_MIGRATION, + MIGRATION_SOURCE, + MIGRATION_DESTINATION, +}; + +static void +usage(char *path) { + fprintf(stderr, "Usage: %s [-h] [-m src|dst] /path/to/socket\n", + basename(path)); +} + +static int +migrate_from(int sock) +{ + __u32 device_state = VFIO_DEVICE_STATE_SAVING; + __u64 pending_bytes, data_offset, data_size; + void *data; + + /* XXX set device state to stop-and-copy */ + int ret = access_region(sock, LM_DEV_MIGRATION_REG_IDX, true, + offsetof(struct vfio_device_migration_info, device_state), + &device_state, sizeof(device_state)); + if (ret < 0) { + fprintf(stderr, "failed to write to device state: %s\n", + strerror(-ret)); + return ret; + } + + /* XXX read pending_bytes */ + ret = access_region(sock, LM_DEV_MIGRATION_REG_IDX, false, + offsetof(struct vfio_device_migration_info, pending_bytes), + &pending_bytes, sizeof pending_bytes); + if (ret < 0) { + fprintf(stderr, "failed to read pending_bytes: %s\n", strerror(-ret)); + return ret; + } + + while (pending_bytes > 0) { + + /* XXX read data_offset and data_size */ + ret = access_region(sock, LM_DEV_MIGRATION_REG_IDX, false, + offsetof(struct vfio_device_migration_info, data_offset), + &data_offset, sizeof data_offset); + if (ret < 0) { + fprintf(stderr, "failed to read data_offset: %s\n", strerror(-ret)); + return ret; + } + + ret = access_region(sock, LM_DEV_MIGRATION_REG_IDX, false, + offsetof(struct vfio_device_migration_info, data_size), + &data_size, sizeof data_size); + if (ret < 0) { + fprintf(stderr, "failed to read data_size: %s\n", strerror(-ret)); + return ret; + } + + /* XXX read migration data */ + data = malloc(data_size); + if (data == NULL) { + return -errno; + } + ret = access_region(sock, LM_DEV_MIGRATION_REG_IDX, false, data_offset, + data, data_size); + if (ret < 0) { + fprintf(stderr, "failed to read migration data: %s\n", + strerror(-ret)); + } + + /* FIXME send migration data to the destination client process */ + printf("XXX migration: %#x bytes worth of data\n", data_size); + + /* + * XXX read pending_bytes again to indicate to the sever that the + * migration data have been consumed. + */ + ret = access_region(sock, LM_DEV_MIGRATION_REG_IDX, false, + offsetof(struct vfio_device_migration_info, pending_bytes), + &pending_bytes, sizeof pending_bytes); + if (ret < 0) { + fprintf(stderr, "failed to read pending_bytes: %s\n", strerror(-ret)); + return ret; + } + } + + /* XXX read device state, migration must have finished now */ + device_state = VFIO_DEVICE_STATE_STOP; + ret = access_region(sock, LM_DEV_MIGRATION_REG_IDX, true, + offsetof(struct vfio_device_migration_info, device_state), + &device_state, sizeof(device_state)); + if (ret < 0) { + fprintf(stderr, "failed to write to device state: %s\n", + strerror(-ret)); + return ret; + } + + return 0; +} + int main(int argc, char *argv[]) { int ret, sock; @@ -565,13 +706,36 @@ int main(int argc, char *argv[]) size_t pgsize; int nr_dma_regions; struct vfio_iommu_type1_dirty_bitmap dirty_bitmap = {0}; + int opt; + enum migration migration = NO_MIGRATION; + + while ((opt = getopt(argc, argv, "hm:")) != -1) { + switch (opt) { + case 'h': + usage(argv[0]); + exit(EXIT_SUCCESS); + case 'm': + if (strcmp(optarg, "src") == 0) { + migration = MIGRATION_SOURCE; + } else if (strcmp(optarg, "dst") == 0) { + migration = MIGRATION_DESTINATION; + } else { + fprintf(stderr, "invalid migration argument %s\n", optarg); + exit(EXIT_FAILURE); + } + break; + default: + usage(argv[0]); + exit(EXIT_FAILURE); + } + } - if (argc != 2) { - fprintf(stderr, "usage: %s /path/to/socket\n", argv[0]); + if (argc != optind + 1) { + usage(argv[0]); exit(EXIT_FAILURE); } - if ((sock = init_sock(argv[1])) < 0) { + if ((sock = init_sock(argv[optind])) < 0) { return sock; } @@ -727,6 +891,10 @@ int main(int argc, char *argv[]) return ret; } + if (migration == MIGRATION_SOURCE) { + ret = migrate_from(sock); + } + return 0; } diff --git a/samples/server.c b/samples/server.c index 9f4d3b1..4611fb0 100644 --- a/samples/server.c +++ b/samples/server.c @@ -40,6 +40,7 @@ #include <assert.h> #include <openssl/md5.h> #include <sys/mman.h> +#include <sys/param.h> #include "../lib/muser.h" @@ -54,6 +55,11 @@ struct server_data { time_t bar0; uint8_t *bar1; struct dma_regions regions[NR_DMA_REGIONS]; + struct { + int fake_internal_state; + __u64 pending_bytes; + __u64 data_size; + } migration; }; static void @@ -205,8 +211,65 @@ static int device_reset(void *pvt) printf("device reset callback\n"); } -int main(int argc, char *argv[]) +static int +migration_device_state_transition(void *pvt, lm_migr_state_t state) { + struct server_data *server_data = pvt; + + printf("migration: transition to device state %d\n", state); + + switch (state) { + case LM_MIGR_STATE_STOP_AND_COPY: + /* TODO must be less than size of data region in migration region */ + server_data->migration.pending_bytes = sysconf(_SC_PAGESIZE); + break; + case LM_MIGR_STATE_STOP: + assert(server_data->migration.pending_bytes == 0); + break; + default: + assert(false); /* FIXME */ + } + return 0; +} + +static __u64 +migration_get_pending_bytes(void *pvt) +{ + struct server_data *server_data = pvt; + if (server_data->migration.data_size > 0) { + assert(server_data->migration.data_size <= server_data->migration.pending_bytes); + server_data->migration.pending_bytes -= server_data->migration.data_size; + } + return server_data->migration.pending_bytes; +} + +static int +migration_prepare_data(void *pvt, __u64 *offset, __u64 *size) +{ + struct server_data *server_data = pvt; + + *offset = 0; + *size = server_data->migration.data_size = MIN(server_data->migration.pending_bytes, sysconf(_SC_PAGESIZE) / 4); + return 0; +} + +static size_t +migration_read_data(void *pvt, void *buf, __u64 size, __u64 offset) +{ + struct server_data *server_data = pvt; + + assert(server_data->migration.data_size >= size); + + return 0; +} + +static size_t +migration_write_data(void *pvt, void *data, __u64 size) +{ + assert(false); +} + +int main(int argc, char *argv[]){ int ret; bool trans_sock = false, verbose = false; char opt; @@ -269,7 +332,7 @@ int main(int argc, char *argv[]) }, .reg_info[LM_DEV_MIGRATION_REG_IDX] = { /* migration region */ .flags = LM_REG_FLAG_RW, - .size = sysconf(_SC_PAGESIZE), + .size = sizeof(struct vfio_device_migration_info) + sysconf(_SC_PAGESIZE), .mmap_areas = sparse_areas, }, .irq_count[LM_DEV_INTX_IRQ_IDX] = 1, @@ -278,7 +341,14 @@ int main(int argc, char *argv[]) .reset = device_reset, .map_dma = map_dma, .unmap_dma = unmap_dma, - .pvt = &server_data + .pvt = &server_data, + .migration_callbacks = { + .transition = &migration_device_state_transition, + .get_pending_bytes = &migration_get_pending_bytes, + .prepare_data = &migration_prepare_data, + .read_data = &migration_read_data, + .write_data = &migration_write_data + } }; sigemptyset(&act.sa_mask); |