diff options
-rw-r--r-- | lib/muser.h | 12 | ||||
-rw-r--r-- | lib/muser_ctx.c | 187 | ||||
-rw-r--r-- | samples/client.c | 280 | ||||
-rw-r--r-- | samples/server.c | 126 |
4 files changed, 476 insertions, 129 deletions
diff --git a/lib/muser.h b/lib/muser.h index 59bc8de..f183e2e 100644 --- a/lib/muser.h +++ b/lib/muser.h @@ -262,7 +262,7 @@ typedef int (lm_migration_callback_t)(void *pvt); typedef enum { LM_MIGR_STATE_STOP, - LM_MIGR_STATE_START, + LM_MIGR_STATE_RUNNING, LM_MIGR_STATE_STOP_AND_COPY, LM_MIGR_STATE_PRE_COPY, LM_MIGR_STATE_RESUME @@ -303,10 +303,16 @@ typedef struct { */ size_t (*read_data)(void *pvt, void *buf, __u64 count, __u64 offset); - /* Callback for restoring device state */ + /* Callbacks for restoring device state */ + + /* + * Function that is called when client has written some previously stored + * device state. + */ + int (*data_written)(void *pvt, __u64 count, __u64 offset); /* Fuction that is called for writing previously stored device state. */ - size_t (*write_data)(void *pvt, void *data, __u64 size); + size_t (*write_data)(void *pvt, void *buf, __u64 count, __u64 offset); } lm_migration_callbacks_t; diff --git a/lib/muser_ctx.c b/lib/muser_ctx.c index 5ea437e..72f6947 100644 --- a/lib/muser_ctx.c +++ b/lib/muser_ctx.c @@ -1230,7 +1230,7 @@ handle_migration_device_state(lm_ctx_t *lm_ctx, __u32 *device_state, break; case VFIO_DEVICE_STATE_RUNNING: ret = lm_ctx->migration.callbacks.transition(lm_ctx->pvt, - LM_MIGR_STATE_START); + LM_MIGR_STATE_RUNNING); break; case VFIO_DEVICE_STATE_SAVING: /* @@ -1317,13 +1317,10 @@ handle_migration_pending_bytes(lm_ctx_t *lm_ctx, __u64 *pending_bytes, */ static ssize_t -handle_migration_data_offset(lm_ctx_t *lm_ctx, __u64 *offset, bool is_write) +handle_migration_data_offset_when_saving(lm_ctx_t *lm_ctx, bool is_write) { int ret; - assert(lm_ctx != NULL); - assert(offset != NULL); - if (is_write) { lm_log(lm_ctx, LM_ERR, "data_offset is RO when saving"); return -EINVAL; @@ -1349,16 +1346,47 @@ handle_migration_data_offset(lm_ctx_t *lm_ctx, __u64 *offset, bool is_write) return -EINVAL; } - *offset = lm_ctx->migration.iter.offset + sizeof(struct vfio_device_migration_info); + return 0; +} + +static ssize_t +handle_migration_data_offset(lm_ctx_t *lm_ctx, __u64 *offset, bool is_write) +{ + int ret; + + assert(lm_ctx != NULL); + + switch (lm_ctx->migration.info.device_state) { + case VFIO_DEVICE_STATE_SAVING: + case VFIO_DEVICE_STATE_RUNNING | VFIO_DEVICE_STATE_SAVING: + ret = handle_migration_data_offset_when_saving(lm_ctx, is_write); + break; + case VFIO_DEVICE_STATE_RESUMING: + if (is_write) { + lm_log(lm_ctx, LM_ERR, "bad write to migration data_offset"); + ret = -EINVAL; + } else { + ret = 0; + } + break; + default: + /* TODO improve error message */ + lm_log(lm_ctx, LM_ERR, "bad access to migration data_offset in state %d", + lm_ctx->migration.info.device_state); + ret = -EINVAL; + } + + if (ret == 0 && !is_write) { + *offset = lm_ctx->migration.iter.offset + sizeof(struct vfio_device_migration_info); + } return ret; } static ssize_t -handle_migration_data_size(lm_ctx_t *lm_ctx, __u64 *size, bool is_write) +handle_migration_data_size_when_saving(lm_ctx_t *lm_ctx, bool is_write) { assert(lm_ctx != NULL); - assert(size != NULL); if (is_write) { /* TODO improve error message */ @@ -1366,9 +1394,47 @@ handle_migration_data_size(lm_ctx_t *lm_ctx, __u64 *size, bool is_write) return -EINVAL; } - switch (lm_ctx->migration.iter.state) { - case VFIO_USER_MIGRATION_ITERATION_STATE_STARTED: - case VFIO_USER_MIGRATION_ITERATION_STATE_DATA_PREPARED: + if (lm_ctx->migration.iter.state != VFIO_USER_MIGRATION_ITERATION_STATE_STARTED && + lm_ctx->migration.iter.state != VFIO_USER_MIGRATION_ITERATION_STATE_DATA_PREPARED) { + lm_log(lm_ctx, LM_ERR, "reading data_size ouf of sequence is undefined"); + return -EINVAL; + } + return 0; +} + +static ssize_t +handle_migration_data_size_when_resuming(lm_ctx_t *lm_ctx, __u64 size, + bool is_write) +{ + int ret = 0; + + assert(lm_ctx != NULL); + + if (is_write) { + ret = lm_ctx->migration.callbacks.data_written(lm_ctx->pvt, + size, + lm_ctx->migration.info.data_offset); + lm_ctx->migration.info.data_size = size; + lm_ctx->migration.info.data_offset += size; + } + return ret; +} + +static ssize_t +handle_migration_data_size(lm_ctx_t *lm_ctx, __u64 *size, bool is_write) +{ + int ret; + + assert(lm_ctx != NULL); + assert(size != NULL); + + switch (lm_ctx->migration.info.device_state){ + case VFIO_DEVICE_STATE_SAVING: + case VFIO_DEVICE_STATE_RUNNING | VFIO_DEVICE_STATE_SAVING: + ret = handle_migration_data_size_when_saving(lm_ctx, is_write); + break; + case VFIO_DEVICE_STATE_RESUMING: + ret = handle_migration_data_size_when_resuming(lm_ctx, *size, is_write); break; default: /* TODO improve error message */ @@ -1376,12 +1442,60 @@ handle_migration_data_size(lm_ctx_t *lm_ctx, __u64 *size, bool is_write) ret = -EINVAL; } - *size = lm_ctx->migration.iter.size; + if (ret == 0 && !is_write) { + *size = lm_ctx->migration.iter.size; + } return 0; } static ssize_t +handle_migration_region_access_registers(lm_ctx_t *lm_ctx, char *buf, + size_t count, loff_t pos, + bool is_write) +{ + int ret; + + assert(lm_ctx != NULL); + + switch (pos) { + case offsetof(struct vfio_device_migration_info, device_state): + if (count != sizeof(lm_ctx->migration.info.device_state)) { + lm_log(lm_ctx, LM_ERR, "bad device_state access size %d", count); + return -EINVAL; + } + ret = handle_migration_device_state(lm_ctx, (__u32*)buf, is_write); + break; + case offsetof(struct vfio_device_migration_info, pending_bytes): + if (count != sizeof(lm_ctx->migration.info.pending_bytes)) { + lm_log(lm_ctx, LM_ERR, "bad pending_bytes access size %d", count); + return -EINVAL; + } + ret = handle_migration_pending_bytes(lm_ctx, (__u64*)buf, is_write); + break; + case offsetof(struct vfio_device_migration_info, data_offset): + if (count != sizeof(lm_ctx->migration.info.data_offset)) { + lm_log(lm_ctx, LM_ERR, "bad data_offset access size %d", count); + return -EINVAL; + } + ret = handle_migration_data_offset(lm_ctx, (__u64*)buf, is_write); + break; + case offsetof(struct vfio_device_migration_info, data_size): + if (count != sizeof(lm_ctx->migration.info.data_size)) { + lm_log(lm_ctx, LM_ERR, "bad data_size access size %d", count); + return -EINVAL; + } + ret = handle_migration_data_size(lm_ctx, (__u64*)buf, is_write); + break; + default: + lm_log(lm_ctx, LM_ERR, "bad migration region register offset %#lx", + pos); + return -EINVAL; + } + return ret; +} + +static ssize_t handle_migration_region_access(lm_ctx_t *lm_ctx, char *buf, size_t count, loff_t pos, bool is_write) { @@ -1395,42 +1509,19 @@ handle_migration_region_access(lm_ctx_t *lm_ctx, char *buf, size_t count, pos, pos + count - 1); return -EINVAL; } - switch (pos) { - case offsetof(struct vfio_device_migration_info, device_state): - if (count != sizeof(lm_ctx->migration.info.device_state)) { - return -EINVAL; - } - ret = handle_migration_device_state(lm_ctx, (__u32*)buf, - is_write); - break; - case offsetof(struct vfio_device_migration_info, pending_bytes): - if (count != sizeof(lm_ctx->migration.info.pending_bytes)) { - return -EINVAL; - } - ret = handle_migration_pending_bytes(lm_ctx, (__u64*)buf, is_write); - break; - case offsetof(struct vfio_device_migration_info, data_offset): - if (count != sizeof(lm_ctx->migration.info.data_offset)) { - return -EINVAL; - } - ret = handle_migration_data_offset(lm_ctx, (__u64*)buf, is_write); - break; - case offsetof(struct vfio_device_migration_info, data_size): - if (count != sizeof(lm_ctx->migration.info.data_size)) { - return -EINVAL; - } - ret = handle_migration_data_size(lm_ctx, (__u64*)buf, is_write); - break; - default: - if (is_write) { - /* FIXME how do we handle the offset? */ - ret = lm_ctx->migration.callbacks.write_data(lm_ctx->pvt, - buf, count); - } else { - ret = lm_ctx->migration.callbacks.read_data(lm_ctx->pvt, - buf, count, - pos - sizeof(struct vfio_device_migration_info)); - } + + if (pos + count <= sizeof(struct vfio_device_migration_info)) { + ret = handle_migration_region_access_registers(lm_ctx, buf, count, + pos, is_write); + } else { + pos -= sizeof(struct vfio_device_migration_info); + if (is_write) { + ret = lm_ctx->migration.callbacks.write_data(lm_ctx->pvt, + buf, count, pos); + } else { + ret = lm_ctx->migration.callbacks.read_data(lm_ctx->pvt, + buf, count, pos); + } } if (ret == 0) { diff --git a/samples/client.c b/samples/client.c index e8a5005..d29df01 100644 --- a/samples/client.c +++ b/samples/client.c @@ -39,6 +39,7 @@ #include <time.h> #include <err.h> #include <assert.h> +#include <sys/stat.h> #include "../lib/muser.h" #include "../lib/muser_priv.h" @@ -365,28 +366,28 @@ wait_for_irqs(int sock, int irq_fd) printf("INTx messaged triggered!\n"); } -static int -access_bar0(int sock, int irq_fd) +static void +access_bar0(int sock, int irq_fd, time_t *t) { - time_t t = 1; - int ret = access_region(sock, LM_DEV_BAR0_REG_IDX, true, 0, &t, sizeof t); + int ret; + + assert(t != NULL); + ret = access_region(sock, LM_DEV_BAR0_REG_IDX, true, 0, t, sizeof *t); if (ret < 0) { errx(EXIT_FAILURE, "failed to write to BAR0: %s", strerror(-ret)); } - printf("wrote to BAR0: %ld\n", t); + printf("wrote to BAR0: %ld\n", *t); - ret = access_region(sock, LM_DEV_BAR0_REG_IDX, false, 0, &t, sizeof t); + ret = access_region(sock, LM_DEV_BAR0_REG_IDX, false, 0, t, sizeof *t); if (ret < 0) { errx(EXIT_FAILURE, "failed to read from BAR0: %s", strerror(-ret)); } - printf("read from BAR0: %ld\n", t); + printf("read from BAR0: %ld\n", *t); - ret = wait_for_irqs(sock, irq_fd); - - return 0; + wait_for_irqs(sock, irq_fd); } static void @@ -561,7 +562,6 @@ migrate_from(int sock, void **data, __u64 *len) { __u32 device_state = VFIO_DEVICE_STATE_SAVING; __u64 pending_bytes, data_offset, data_size; - void *data; /* XXX set device state to stop-and-copy */ int ret = access_region(sock, LM_DEV_MIGRATION_REG_IDX, true, @@ -581,6 +581,21 @@ migrate_from(int sock, void **data, __u64 *len) strerror(-ret)); } + /* We do expect some migration data. */ + assert(pending_bytes > 0); + + /* + * The only expectation about pending_bytes is whether it's zero or + * non-zero, therefore it must be considered volatile, even acrosss + * iterantions. In the sample server we know it's static so it's fairly + * straightforward. + */ + *len = pending_bytes; + *data = malloc(*len); + if (*data == NULL) { + err(EXIT_FAILURE, "failed to allocate migration buffer"); + } + while (pending_bytes > 0) { /* XXX read data_offset and data_size */ @@ -600,20 +615,18 @@ migrate_from(int sock, void **data, __u64 *len) strerror(-ret)); } + assert(data_offset - sizeof(struct vfio_device_migration_info) + data_size <= *len); + /* XXX read migration data */ - data = malloc(data_size); - if (data == NULL) { - return -errno; - } ret = access_region(sock, LM_DEV_MIGRATION_REG_IDX, false, data_offset, - data, data_size); + (char*)*data + data_offset - sizeof(struct vfio_device_migration_info), + data_size); if (ret < 0) { errx(EXIT_FAILURE, "failed to read migration data: %s", strerror(-ret)); } /* FIXME send migration data to the destination client process */ - printf("XXX migration: %#llx bytes worth of data\n", data_size); /* * XXX read pending_bytes again to indicate to the sever that the @@ -637,8 +650,126 @@ migrate_from(int sock, void **data, __u64 *len) errx(EXIT_FAILURE, "failed to write to device state: %s", strerror(-ret)); } +} - return 0; +static int +migrate_to(char *old_sock_path, int client_max_fds, int *server_max_fds, + size_t *pgsize, void *migr_data, __u64 migr_data_len) +{ + int ret, sock; + char *sock_path; + struct stat sb; + __u32 device_state = VFIO_DEVICE_STATE_RESUMING; + __u64 data_offset; + + assert(old_sock_path != NULL); + + ret = asprintf(&sock_path, "%s_migrated", old_sock_path); + if (ret == -1) { + err(EXIT_FAILURE, "failed to asprintf"); + } + + ret = fork(); + if (ret == -1) { + err(EXIT_FAILURE, "failed to fork"); + } + if (ret > 0) { /* child (destination server) */ + char *_argv[] = { + "build/dbg/samples/server", + "-v", + sock_path, + NULL + }; + ret = execvp(_argv[0] , _argv); + if (ret != 0) { + err(EXIT_FAILURE, "failed to start destination sever"); + } + } + + /* parent (client) */ + + /* wait for the server to come up */ + while (stat(sock_path, &sb) == -1) { + if (errno != ENOENT) { + err(EXIT_FAILURE, "failed to stat %s", sock_path); + } + } + if ((sb.st_mode & S_IFMT) != S_IFSOCK) { + errx(EXIT_FAILURE, "%s: not a socket", sock_path); + } + + /* connect to the destination server */ + sock = init_sock(sock_path); + + set_version(sock, client_max_fds, server_max_fds, pgsize); + + /* XXX set device state to resuming */ + ret = access_region(sock, LM_DEV_MIGRATION_REG_IDX, true, + offsetof(struct vfio_device_migration_info, device_state), + &device_state, sizeof(device_state)); + if (ret < 0) { + errx(EXIT_FAILURE, "failed to set device state to resuming: %s", + strerror(-ret)); + } + + /* XXX read data offset */ + ret = access_region(sock, LM_DEV_MIGRATION_REG_IDX, false, + offsetof(struct vfio_device_migration_info, data_offset), + &data_offset, sizeof(data_offset)); + if (ret < 0) { + errx(EXIT_FAILURE, "failed to read data offset: %s", strerror(-ret)); + } + + /* XXX write migration data */ + + /* + * TODO write half of migration data via regular write and other half via + * memopy map. + */ + ret = access_region(sock, LM_DEV_MIGRATION_REG_IDX, true, + data_offset, migr_data, migr_data_len); + if (ret < 0) { + errx(EXIT_FAILURE, "failed to write migration data: %s", + strerror(-ret)); + } + + /* XXX write data_size */ + ret = access_region(sock, LM_DEV_MIGRATION_REG_IDX, true, + offsetof(struct vfio_device_migration_info, data_size), + &migr_data_len, sizeof migr_data_len); + if (ret < 0) { + errx(EXIT_FAILURE, "failed to write data size: %s", strerror(-ret)); + } + + /* XXX set device state to running */ + device_state = VFIO_DEVICE_STATE_RUNNING; + ret = access_region(sock, LM_DEV_MIGRATION_REG_IDX, true, + offsetof(struct vfio_device_migration_info, device_state), + &device_state, sizeof(device_state)); + if (ret < 0) { + errx(EXIT_FAILURE, "failed to set device state to running: %s", + strerror(-ret)); + } + + return sock; +} + +static void +map_dma_regions(int sock, int max_fds, struct vfio_user_dma_region *dma_regions, + int *dma_region_fds, int nr_dma_regions) +{ + int i, ret; + + for (i = 0; i < nr_dma_regions / max_fds; i++) { + ret = send_recv_vfio_user_msg(sock, i, VFIO_USER_DMA_MAP, + dma_regions + (i * max_fds), + sizeof(*dma_regions) * max_fds, + dma_region_fds + (i * max_fds), + max_fds, NULL, NULL, 0); + if (ret < 0) { + errx(EXIT_FAILURE, "failed to map DMA regions: %s", strerror(-ret)); + } + } } int main(int argc, char *argv[]) @@ -656,23 +787,15 @@ int main(int argc, char *argv[]) int nr_dma_regions; struct vfio_iommu_type1_dirty_bitmap dirty_bitmap = {0}; int opt; - enum migration migration = NO_MIGRATION; + time_t t; + void *migr_data; + __u64 migr_data_len; - while ((opt = getopt(argc, argv, "hm:")) != -1) { + while ((opt = getopt(argc, argv, "h")) != -1) { switch (opt) { case 'h': usage(argv[0]); exit(EXIT_SUCCESS); - case 'm': - if (strcmp(optarg, "src") == 0) { - migration = MIGRATION_SOURCE; - } else if (strcmp(optarg, "dst") == 0) { - migration = MIGRATION_DESTINATION; - } else { - fprintf(stderr, "invalid migration argument %s\n", optarg); - exit(EXIT_FAILURE); - } - break; default: usage(argv[0]); exit(EXIT_FAILURE); @@ -702,8 +825,6 @@ int main(int argc, char *argv[]) ret); } - - /* XXX VFIO_USER_DEVICE_GET_INFO */ get_device_info(sock, &client_dev_info); @@ -742,17 +863,8 @@ int main(int argc, char *argv[]) dma_region_fds[i] = fileno(fp); } - for (i = 0; i < nr_dma_regions / server_max_fds; i++, msg_id++) { - ret = send_recv_vfio_user_msg(sock, msg_id, VFIO_USER_DMA_MAP, - dma_regions + (i * server_max_fds), - sizeof(*dma_regions) * server_max_fds, - dma_region_fds + (i * server_max_fds), - server_max_fds, NULL, NULL, 0); - if (ret < 0) { - fprintf(stderr, "failed to map DMA regions: %s\n", strerror(-ret)); - return ret; - } - } + map_dma_regions(sock, server_max_fds, dma_regions, dma_region_fds, + nr_dma_regions); /* * XXX VFIO_USER_DEVICE_GET_IRQ_INFO and VFIO_IRQ_SET_ACTION_TRIGGER @@ -776,11 +888,10 @@ int main(int argc, char *argv[]) * BAR0 in the server does not support memory mapping so it must be accessed * via explicit messages. */ - ret = access_bar0(sock, irq_fd); - if (ret < 0) { - fprintf(stderr, "failed to access BAR0: %s\n", strerror(-ret)); - exit(EXIT_FAILURE); - } + t = time(NULL) + 1; + access_bar0(sock, irq_fd, &t); + + /* FIXME check that above took at least 1s */ handle_dma_io(sock, dma_regions, nr_dma_regions, dma_region_fds); @@ -796,20 +907,9 @@ int main(int argc, char *argv[]) strerror(-ret)); } - /* - * FIXME now that region read/write works, change the server implementation - * to trigger an interrupt after N seconds, where N is the value written to - * BAR0 by the client. - */ - /* BAR1 can be memory mapped and read directly */ /* - * TODO implement the following: write a value in BAR1, a server timer will - * increase it every second (SIGALARM) - */ - - /* * XXX VFIO_USER_DMA_UNMAP * * unmap the first group of the DMA regions @@ -821,10 +921,68 @@ int main(int argc, char *argv[]) errx(EXIT_FAILURE, "failed to unmap DMA regions: %s", strerror(-ret)); } - if (migration == MIGRATION_SOURCE) { - ret = migrate_from(sock); + /* + * Schedule an interrupt in 2 seconds from now in the old server and then + * immediatelly migrate the device. The new server should deliver the + * interrupt. Hopefully 2 seconds should be enough for migration to finish. + * TODO make this value a command line option. + */ + t = time(NULL) + 2; + ret = access_region(sock, LM_DEV_BAR0_REG_IDX, true, 0, &t, sizeof t); + if (ret < 0) { + errx(EXIT_FAILURE, "failed to write to BAR0: %s", strerror(-ret)); + } + + /* + * By sleeping here for 1s after migration finishes on the source server + * (but not yet started on the destination server), the timer should be be + * armed on the destination server for 2-1=1 seconds. If we don't sleep + * then it will be armed for 2 seconds, which isn't as interesting. + */ + sleep(1); + + migrate_from(sock, &migr_data, &migr_data_len); + + /* + * Normally the client would now send the device state to the destination + * client and then exit. We don't demonstrate how this works as this is a + * client implementation detail. Instead, the client starts the destination + * server and then applies the mgiration data. + */ + + sock = migrate_to(argv[optind], client_max_fds, &server_max_fds, &pgsize, + migr_data, migr_data_len); + + /* + * Now we must reconfigure the destination server. + */ + + /* + * XXX reconfigure DMA regions, note that the first half of the has been + * unmapped. + */ + map_dma_regions(sock, server_max_fds, dma_regions + server_max_fds, + dma_region_fds + server_max_fds, + nr_dma_regions - server_max_fds); + + /* + * XXX reconfigure IRQs. + * FIXME is this something the client needs to do? I would expect so since + * it's the client that creates and provides the FD. Do we need to save some + * state in the migration data? + */ + ret = configure_irqs(sock); + if (ret < 0) { + errx(EXIT_FAILURE, "failed to configure IRQs on destination server: %s", + strerror(-ret)); } + irq_fd = ret; + + wait_for_irqs(sock, irq_fd); + handle_dma_io(sock, dma_regions + server_max_fds, + nr_dma_regions - server_max_fds, + dma_region_fds + server_max_fds); return 0; } diff --git a/samples/server.c b/samples/server.c index 92f3312..5dcda46 100644 --- a/samples/server.c +++ b/samples/server.c @@ -59,9 +59,11 @@ struct server_data { uint8_t *bar1; struct dma_regions regions[NR_DMA_REGIONS]; struct { - int fake_internal_state; __u64 pending_bytes; __u64 data_size; + void *migr_data; + size_t migr_data_len; + lm_migr_state_t state; } migration; }; @@ -71,7 +73,19 @@ _log(UNUSED void *pvt, UNUSED lm_log_lvl_t lvl, char const *msg) fprintf(stderr, "server: %s\n", msg); } -/* returns time in seconds since Epoch */ +static int +arm_timer(struct server_data *server_data, time_t t) +{ + struct itimerval new = {.it_value.tv_sec = t - time(NULL) }; + lm_log(server_data->lm_ctx, LM_DBG, + "arming timer to trigger in %d seconds", new.it_value.tv_sec); + if (setitimer(ITIMER_REAL, &new, NULL) != 0) { + lm_log(server_data->lm_ctx, LM_ERR, "failed to arm timer: %m"); + return -errno; + } + return 0; +} + ssize_t bar0_access(void *pvt, char * const buf, size_t count, loff_t offset, const bool is_write) @@ -79,17 +93,18 @@ bar0_access(void *pvt, char * const buf, size_t count, loff_t offset, struct server_data *server_data = pvt; if (count != sizeof(time_t) || offset != 0) { + lm_log(server_data->lm_ctx, LM_ERR, "bad BAR0 access %#lx-%#lx", + offset, offset + count - 1); errno = EINVAL; return -1; } if (is_write) { - struct itimerval new = {.it_value.tv_sec = *(time_t*)buf}; - lm_log(server_data->lm_ctx, LM_DBG, - "arming timer to trigger in %d seconds", new.it_value.tv_sec); - if (setitimer(ITIMER_REAL, &new, NULL) != 0) { - lm_log(server_data->lm_ctx, LM_ERR, "failed to arm timer: %m"); - return -1; + if (server_data->migration.state == LM_MIGR_STATE_RUNNING) { + int ret = arm_timer(server_data, *(time_t*)buf); + if (ret < 0) { + return ret; + } } memcpy(&server_data->bar0, buf, count); } else { @@ -230,6 +245,7 @@ static int device_reset(UNUSED void *pvt) static int migration_device_state_transition(void *pvt, lm_migr_state_t state) { + int ret; struct server_data *server_data = pvt; printf("migration: transition to device state %d\n", state); @@ -242,9 +258,18 @@ migration_device_state_transition(void *pvt, lm_migr_state_t state) case LM_MIGR_STATE_STOP: assert(server_data->migration.pending_bytes == 0); break; + case LM_MIGR_STATE_RESUME: + break; + case LM_MIGR_STATE_RUNNING: + ret = arm_timer(server_data, server_data->bar0); + if (ret < 0) { + return ret; + } + break; default: assert(false); /* FIXME */ } + server_data->migration.state = state; return 0; } @@ -265,27 +290,76 @@ migration_prepare_data(void *pvt, __u64 *offset, __u64 *size) struct server_data *server_data = pvt; *offset = 0; - *size = server_data->migration.data_size = MIN(server_data->migration.pending_bytes, sysconf(_SC_PAGESIZE) / 4); + *size = server_data->migration.data_size = MIN(server_data->migration.pending_bytes, server_data->migration.migr_data_len / 4); return 0; } static size_t -migration_read_data(void *pvt, UNUSED void *buf, __u64 size, - UNUSED __u64 offset) +migration_read_data(void *pvt, void *buf, __u64 size, __u64 offset) { struct server_data *server_data = pvt; if (server_data->migration.data_size < size) { - assert(false); + lm_log(server_data->lm_ctx, LM_ERR, + "invalid migration data read %#lx-%#lx", + offset, offset + size - 1); + return -EINVAL; } - return 0; + /* FIXME implement, client should be able to write any byte range */ + assert((offset == 0 && size >= sizeof server_data->bar0) + || offset >= sizeof server_data->bar0); + + if (offset == 0 && size >= sizeof server_data->bar0) { + memcpy(buf, &server_data->bar0, sizeof server_data->bar0); + } + return size; } static size_t -migration_write_data(UNUSED void *pvt, UNUSED void *data, UNUSED __u64 size) +migration_write_data(void *pvt, void *data, __u64 size, __u64 offset) { - assert(false); + struct server_data *server_data = pvt; + + assert(server_data != NULL); + assert(data != NULL); + + if (offset + size > server_data->migration.migr_data_len) { + lm_log(server_data->lm_ctx, LM_ERR, + "invalid write %#llx-%#llx", offset, offset + size - 1); + } + + memcpy(server_data->migration.migr_data + offset, data, size); + + return 0; +} + + +static int +migration_data_written(void *pvt, __u64 count, __u64 offset) +{ + int ret; + struct server_data *server_data = pvt; + + assert(server_data != NULL); + + if (offset + count > server_data->migration.migr_data_len) { + lm_log(server_data->lm_ctx, LM_ERR, + "bad migration data range %#llx-%#llx", + offset, offset + count - 1); + return -EINVAL; + } + + if (offset == 0 && count >= sizeof server_data->bar0) { + + /* apply device state */ + /* FIXME must arm timer only after device is resumed!!! */ + ret = bar0_access(pvt, server_data->migration.migr_data, + sizeof server_data->bar0, 0, true); + if (ret < 0) { + return ret; + } + } return 0; } @@ -295,7 +369,13 @@ int main(int argc, char *argv[]){ bool verbose = false; char opt; struct sigaction act = {.sa_handler = _sa_handler}; - struct server_data server_data = {0}; + struct server_data server_data = { + .migration = { + /* one page so that we can memory map it */ + .migr_data_len = sysconf(_SC_PAGESIZE), + .state = LM_MIGR_STATE_RUNNING + } + }; int nr_sparse_areas = 2, size = 1024, i; struct lm_sparse_mmap_areas *sparse_areas; lm_ctx_t *lm_ctx; @@ -349,7 +429,12 @@ int main(int argc, char *argv[]){ }, .reg_info[LM_DEV_MIGRATION_REG_IDX] = { /* migration region */ .flags = LM_REG_FLAG_RW, - .size = sizeof(struct vfio_device_migration_info) + sysconf(_SC_PAGESIZE), + /* + * FIXME don't declare support for migration via a region, this + * is a VFIO artifact, make it something different. We still + * have to make the migration data memory mappable. + */ + .size = sizeof(struct vfio_device_migration_info) + server_data.migration.migr_data_len, .mmap_areas = sparse_areas, }, .irq_count[LM_DEV_INTX_IRQ_IDX] = 1, @@ -364,6 +449,7 @@ int main(int argc, char *argv[]){ .get_pending_bytes = &migration_get_pending_bytes, .prepare_data = &migration_prepare_data, .read_data = &migration_read_data, + .data_written = &migration_data_written, .write_data = &migration_write_data } }; @@ -378,6 +464,12 @@ int main(int argc, char *argv[]){ err(EXIT_FAILURE, "failed to initialize device emulation\n"); } + server_data.migration.migr_data = aligned_alloc(server_data.migration.migr_data_len, + server_data.migration.migr_data_len); + if (server_data.migration.migr_data == NULL) { + errx(EXIT_FAILURE, "failed to allocate migration data"); + } + do { ret = lm_ctx_drive(lm_ctx); if (ret == -EINTR) { |