aboutsummaryrefslogtreecommitdiff
path: root/samples
diff options
context:
space:
mode:
authorThanos Makatos <thanos.makatos@nutanix.com>2020-11-18 11:08:58 -0500
committerThanos <tmakatos@gmail.com>2020-11-18 16:50:58 +0000
commitf8addbe68205bf0ed98b60a715cf84a27fb3799a (patch)
tree5a3616ae71c58e8a1fe1ecba6bac5ce1d62a7669 /samples
parenta4b39947ea5b7108540fa80354758f81c90d33d1 (diff)
downloadlibvfio-user-f8addbe68205bf0ed98b60a715cf84a27fb3799a.zip
libvfio-user-f8addbe68205bf0ed98b60a715cf84a27fb3799a.tar.gz
libvfio-user-f8addbe68205bf0ed98b60a715cf84a27fb3799a.tar.bz2
implement migration resuming on destination server
This patch implements resuming on the destination server. We also demonstrate how to do this in the client/server sample. Signed-off-by: Thanos Makatos <thanos.makatos@nutanix.com>
Diffstat (limited to 'samples')
-rw-r--r--samples/client.c280
-rw-r--r--samples/server.c126
2 files changed, 328 insertions, 78 deletions
diff --git a/samples/client.c b/samples/client.c
index e8a5005..d29df01 100644
--- a/samples/client.c
+++ b/samples/client.c
@@ -39,6 +39,7 @@
#include <time.h>
#include <err.h>
#include <assert.h>
+#include <sys/stat.h>
#include "../lib/muser.h"
#include "../lib/muser_priv.h"
@@ -365,28 +366,28 @@ wait_for_irqs(int sock, int irq_fd)
printf("INTx messaged triggered!\n");
}
-static int
-access_bar0(int sock, int irq_fd)
+static void
+access_bar0(int sock, int irq_fd, time_t *t)
{
- time_t t = 1;
- int ret = access_region(sock, LM_DEV_BAR0_REG_IDX, true, 0, &t, sizeof t);
+ int ret;
+
+ assert(t != NULL);
+ ret = access_region(sock, LM_DEV_BAR0_REG_IDX, true, 0, t, sizeof *t);
if (ret < 0) {
errx(EXIT_FAILURE, "failed to write to BAR0: %s", strerror(-ret));
}
- printf("wrote to BAR0: %ld\n", t);
+ printf("wrote to BAR0: %ld\n", *t);
- ret = access_region(sock, LM_DEV_BAR0_REG_IDX, false, 0, &t, sizeof t);
+ ret = access_region(sock, LM_DEV_BAR0_REG_IDX, false, 0, t, sizeof *t);
if (ret < 0) {
errx(EXIT_FAILURE, "failed to read from BAR0: %s", strerror(-ret));
}
- printf("read from BAR0: %ld\n", t);
+ printf("read from BAR0: %ld\n", *t);
- ret = wait_for_irqs(sock, irq_fd);
-
- return 0;
+ wait_for_irqs(sock, irq_fd);
}
static void
@@ -561,7 +562,6 @@ migrate_from(int sock, void **data, __u64 *len)
{
__u32 device_state = VFIO_DEVICE_STATE_SAVING;
__u64 pending_bytes, data_offset, data_size;
- void *data;
/* XXX set device state to stop-and-copy */
int ret = access_region(sock, LM_DEV_MIGRATION_REG_IDX, true,
@@ -581,6 +581,21 @@ migrate_from(int sock, void **data, __u64 *len)
strerror(-ret));
}
+ /* We do expect some migration data. */
+ assert(pending_bytes > 0);
+
+ /*
+ * The only expectation about pending_bytes is whether it's zero or
+ * non-zero, therefore it must be considered volatile, even acrosss
+ * iterantions. In the sample server we know it's static so it's fairly
+ * straightforward.
+ */
+ *len = pending_bytes;
+ *data = malloc(*len);
+ if (*data == NULL) {
+ err(EXIT_FAILURE, "failed to allocate migration buffer");
+ }
+
while (pending_bytes > 0) {
/* XXX read data_offset and data_size */
@@ -600,20 +615,18 @@ migrate_from(int sock, void **data, __u64 *len)
strerror(-ret));
}
+ assert(data_offset - sizeof(struct vfio_device_migration_info) + data_size <= *len);
+
/* XXX read migration data */
- data = malloc(data_size);
- if (data == NULL) {
- return -errno;
- }
ret = access_region(sock, LM_DEV_MIGRATION_REG_IDX, false, data_offset,
- data, data_size);
+ (char*)*data + data_offset - sizeof(struct vfio_device_migration_info),
+ data_size);
if (ret < 0) {
errx(EXIT_FAILURE, "failed to read migration data: %s",
strerror(-ret));
}
/* FIXME send migration data to the destination client process */
- printf("XXX migration: %#llx bytes worth of data\n", data_size);
/*
* XXX read pending_bytes again to indicate to the sever that the
@@ -637,8 +650,126 @@ migrate_from(int sock, void **data, __u64 *len)
errx(EXIT_FAILURE, "failed to write to device state: %s",
strerror(-ret));
}
+}
- return 0;
+static int
+migrate_to(char *old_sock_path, int client_max_fds, int *server_max_fds,
+ size_t *pgsize, void *migr_data, __u64 migr_data_len)
+{
+ int ret, sock;
+ char *sock_path;
+ struct stat sb;
+ __u32 device_state = VFIO_DEVICE_STATE_RESUMING;
+ __u64 data_offset;
+
+ assert(old_sock_path != NULL);
+
+ ret = asprintf(&sock_path, "%s_migrated", old_sock_path);
+ if (ret == -1) {
+ err(EXIT_FAILURE, "failed to asprintf");
+ }
+
+ ret = fork();
+ if (ret == -1) {
+ err(EXIT_FAILURE, "failed to fork");
+ }
+ if (ret > 0) { /* child (destination server) */
+ char *_argv[] = {
+ "build/dbg/samples/server",
+ "-v",
+ sock_path,
+ NULL
+ };
+ ret = execvp(_argv[0] , _argv);
+ if (ret != 0) {
+ err(EXIT_FAILURE, "failed to start destination sever");
+ }
+ }
+
+ /* parent (client) */
+
+ /* wait for the server to come up */
+ while (stat(sock_path, &sb) == -1) {
+ if (errno != ENOENT) {
+ err(EXIT_FAILURE, "failed to stat %s", sock_path);
+ }
+ }
+ if ((sb.st_mode & S_IFMT) != S_IFSOCK) {
+ errx(EXIT_FAILURE, "%s: not a socket", sock_path);
+ }
+
+ /* connect to the destination server */
+ sock = init_sock(sock_path);
+
+ set_version(sock, client_max_fds, server_max_fds, pgsize);
+
+ /* XXX set device state to resuming */
+ ret = access_region(sock, LM_DEV_MIGRATION_REG_IDX, true,
+ offsetof(struct vfio_device_migration_info, device_state),
+ &device_state, sizeof(device_state));
+ if (ret < 0) {
+ errx(EXIT_FAILURE, "failed to set device state to resuming: %s",
+ strerror(-ret));
+ }
+
+ /* XXX read data offset */
+ ret = access_region(sock, LM_DEV_MIGRATION_REG_IDX, false,
+ offsetof(struct vfio_device_migration_info, data_offset),
+ &data_offset, sizeof(data_offset));
+ if (ret < 0) {
+ errx(EXIT_FAILURE, "failed to read data offset: %s", strerror(-ret));
+ }
+
+ /* XXX write migration data */
+
+ /*
+ * TODO write half of migration data via regular write and other half via
+ * memopy map.
+ */
+ ret = access_region(sock, LM_DEV_MIGRATION_REG_IDX, true,
+ data_offset, migr_data, migr_data_len);
+ if (ret < 0) {
+ errx(EXIT_FAILURE, "failed to write migration data: %s",
+ strerror(-ret));
+ }
+
+ /* XXX write data_size */
+ ret = access_region(sock, LM_DEV_MIGRATION_REG_IDX, true,
+ offsetof(struct vfio_device_migration_info, data_size),
+ &migr_data_len, sizeof migr_data_len);
+ if (ret < 0) {
+ errx(EXIT_FAILURE, "failed to write data size: %s", strerror(-ret));
+ }
+
+ /* XXX set device state to running */
+ device_state = VFIO_DEVICE_STATE_RUNNING;
+ ret = access_region(sock, LM_DEV_MIGRATION_REG_IDX, true,
+ offsetof(struct vfio_device_migration_info, device_state),
+ &device_state, sizeof(device_state));
+ if (ret < 0) {
+ errx(EXIT_FAILURE, "failed to set device state to running: %s",
+ strerror(-ret));
+ }
+
+ return sock;
+}
+
+static void
+map_dma_regions(int sock, int max_fds, struct vfio_user_dma_region *dma_regions,
+ int *dma_region_fds, int nr_dma_regions)
+{
+ int i, ret;
+
+ for (i = 0; i < nr_dma_regions / max_fds; i++) {
+ ret = send_recv_vfio_user_msg(sock, i, VFIO_USER_DMA_MAP,
+ dma_regions + (i * max_fds),
+ sizeof(*dma_regions) * max_fds,
+ dma_region_fds + (i * max_fds),
+ max_fds, NULL, NULL, 0);
+ if (ret < 0) {
+ errx(EXIT_FAILURE, "failed to map DMA regions: %s", strerror(-ret));
+ }
+ }
}
int main(int argc, char *argv[])
@@ -656,23 +787,15 @@ int main(int argc, char *argv[])
int nr_dma_regions;
struct vfio_iommu_type1_dirty_bitmap dirty_bitmap = {0};
int opt;
- enum migration migration = NO_MIGRATION;
+ time_t t;
+ void *migr_data;
+ __u64 migr_data_len;
- while ((opt = getopt(argc, argv, "hm:")) != -1) {
+ while ((opt = getopt(argc, argv, "h")) != -1) {
switch (opt) {
case 'h':
usage(argv[0]);
exit(EXIT_SUCCESS);
- case 'm':
- if (strcmp(optarg, "src") == 0) {
- migration = MIGRATION_SOURCE;
- } else if (strcmp(optarg, "dst") == 0) {
- migration = MIGRATION_DESTINATION;
- } else {
- fprintf(stderr, "invalid migration argument %s\n", optarg);
- exit(EXIT_FAILURE);
- }
- break;
default:
usage(argv[0]);
exit(EXIT_FAILURE);
@@ -702,8 +825,6 @@ int main(int argc, char *argv[])
ret);
}
-
-
/* XXX VFIO_USER_DEVICE_GET_INFO */
get_device_info(sock, &client_dev_info);
@@ -742,17 +863,8 @@ int main(int argc, char *argv[])
dma_region_fds[i] = fileno(fp);
}
- for (i = 0; i < nr_dma_regions / server_max_fds; i++, msg_id++) {
- ret = send_recv_vfio_user_msg(sock, msg_id, VFIO_USER_DMA_MAP,
- dma_regions + (i * server_max_fds),
- sizeof(*dma_regions) * server_max_fds,
- dma_region_fds + (i * server_max_fds),
- server_max_fds, NULL, NULL, 0);
- if (ret < 0) {
- fprintf(stderr, "failed to map DMA regions: %s\n", strerror(-ret));
- return ret;
- }
- }
+ map_dma_regions(sock, server_max_fds, dma_regions, dma_region_fds,
+ nr_dma_regions);
/*
* XXX VFIO_USER_DEVICE_GET_IRQ_INFO and VFIO_IRQ_SET_ACTION_TRIGGER
@@ -776,11 +888,10 @@ int main(int argc, char *argv[])
* BAR0 in the server does not support memory mapping so it must be accessed
* via explicit messages.
*/
- ret = access_bar0(sock, irq_fd);
- if (ret < 0) {
- fprintf(stderr, "failed to access BAR0: %s\n", strerror(-ret));
- exit(EXIT_FAILURE);
- }
+ t = time(NULL) + 1;
+ access_bar0(sock, irq_fd, &t);
+
+ /* FIXME check that above took at least 1s */
handle_dma_io(sock, dma_regions, nr_dma_regions, dma_region_fds);
@@ -796,20 +907,9 @@ int main(int argc, char *argv[])
strerror(-ret));
}
- /*
- * FIXME now that region read/write works, change the server implementation
- * to trigger an interrupt after N seconds, where N is the value written to
- * BAR0 by the client.
- */
-
/* BAR1 can be memory mapped and read directly */
/*
- * TODO implement the following: write a value in BAR1, a server timer will
- * increase it every second (SIGALARM)
- */
-
- /*
* XXX VFIO_USER_DMA_UNMAP
*
* unmap the first group of the DMA regions
@@ -821,10 +921,68 @@ int main(int argc, char *argv[])
errx(EXIT_FAILURE, "failed to unmap DMA regions: %s", strerror(-ret));
}
- if (migration == MIGRATION_SOURCE) {
- ret = migrate_from(sock);
+ /*
+ * Schedule an interrupt in 2 seconds from now in the old server and then
+ * immediatelly migrate the device. The new server should deliver the
+ * interrupt. Hopefully 2 seconds should be enough for migration to finish.
+ * TODO make this value a command line option.
+ */
+ t = time(NULL) + 2;
+ ret = access_region(sock, LM_DEV_BAR0_REG_IDX, true, 0, &t, sizeof t);
+ if (ret < 0) {
+ errx(EXIT_FAILURE, "failed to write to BAR0: %s", strerror(-ret));
+ }
+
+ /*
+ * By sleeping here for 1s after migration finishes on the source server
+ * (but not yet started on the destination server), the timer should be be
+ * armed on the destination server for 2-1=1 seconds. If we don't sleep
+ * then it will be armed for 2 seconds, which isn't as interesting.
+ */
+ sleep(1);
+
+ migrate_from(sock, &migr_data, &migr_data_len);
+
+ /*
+ * Normally the client would now send the device state to the destination
+ * client and then exit. We don't demonstrate how this works as this is a
+ * client implementation detail. Instead, the client starts the destination
+ * server and then applies the mgiration data.
+ */
+
+ sock = migrate_to(argv[optind], client_max_fds, &server_max_fds, &pgsize,
+ migr_data, migr_data_len);
+
+ /*
+ * Now we must reconfigure the destination server.
+ */
+
+ /*
+ * XXX reconfigure DMA regions, note that the first half of the has been
+ * unmapped.
+ */
+ map_dma_regions(sock, server_max_fds, dma_regions + server_max_fds,
+ dma_region_fds + server_max_fds,
+ nr_dma_regions - server_max_fds);
+
+ /*
+ * XXX reconfigure IRQs.
+ * FIXME is this something the client needs to do? I would expect so since
+ * it's the client that creates and provides the FD. Do we need to save some
+ * state in the migration data?
+ */
+ ret = configure_irqs(sock);
+ if (ret < 0) {
+ errx(EXIT_FAILURE, "failed to configure IRQs on destination server: %s",
+ strerror(-ret));
}
+ irq_fd = ret;
+
+ wait_for_irqs(sock, irq_fd);
+ handle_dma_io(sock, dma_regions + server_max_fds,
+ nr_dma_regions - server_max_fds,
+ dma_region_fds + server_max_fds);
return 0;
}
diff --git a/samples/server.c b/samples/server.c
index 92f3312..5dcda46 100644
--- a/samples/server.c
+++ b/samples/server.c
@@ -59,9 +59,11 @@ struct server_data {
uint8_t *bar1;
struct dma_regions regions[NR_DMA_REGIONS];
struct {
- int fake_internal_state;
__u64 pending_bytes;
__u64 data_size;
+ void *migr_data;
+ size_t migr_data_len;
+ lm_migr_state_t state;
} migration;
};
@@ -71,7 +73,19 @@ _log(UNUSED void *pvt, UNUSED lm_log_lvl_t lvl, char const *msg)
fprintf(stderr, "server: %s\n", msg);
}
-/* returns time in seconds since Epoch */
+static int
+arm_timer(struct server_data *server_data, time_t t)
+{
+ struct itimerval new = {.it_value.tv_sec = t - time(NULL) };
+ lm_log(server_data->lm_ctx, LM_DBG,
+ "arming timer to trigger in %d seconds", new.it_value.tv_sec);
+ if (setitimer(ITIMER_REAL, &new, NULL) != 0) {
+ lm_log(server_data->lm_ctx, LM_ERR, "failed to arm timer: %m");
+ return -errno;
+ }
+ return 0;
+}
+
ssize_t
bar0_access(void *pvt, char * const buf, size_t count, loff_t offset,
const bool is_write)
@@ -79,17 +93,18 @@ bar0_access(void *pvt, char * const buf, size_t count, loff_t offset,
struct server_data *server_data = pvt;
if (count != sizeof(time_t) || offset != 0) {
+ lm_log(server_data->lm_ctx, LM_ERR, "bad BAR0 access %#lx-%#lx",
+ offset, offset + count - 1);
errno = EINVAL;
return -1;
}
if (is_write) {
- struct itimerval new = {.it_value.tv_sec = *(time_t*)buf};
- lm_log(server_data->lm_ctx, LM_DBG,
- "arming timer to trigger in %d seconds", new.it_value.tv_sec);
- if (setitimer(ITIMER_REAL, &new, NULL) != 0) {
- lm_log(server_data->lm_ctx, LM_ERR, "failed to arm timer: %m");
- return -1;
+ if (server_data->migration.state == LM_MIGR_STATE_RUNNING) {
+ int ret = arm_timer(server_data, *(time_t*)buf);
+ if (ret < 0) {
+ return ret;
+ }
}
memcpy(&server_data->bar0, buf, count);
} else {
@@ -230,6 +245,7 @@ static int device_reset(UNUSED void *pvt)
static int
migration_device_state_transition(void *pvt, lm_migr_state_t state)
{
+ int ret;
struct server_data *server_data = pvt;
printf("migration: transition to device state %d\n", state);
@@ -242,9 +258,18 @@ migration_device_state_transition(void *pvt, lm_migr_state_t state)
case LM_MIGR_STATE_STOP:
assert(server_data->migration.pending_bytes == 0);
break;
+ case LM_MIGR_STATE_RESUME:
+ break;
+ case LM_MIGR_STATE_RUNNING:
+ ret = arm_timer(server_data, server_data->bar0);
+ if (ret < 0) {
+ return ret;
+ }
+ break;
default:
assert(false); /* FIXME */
}
+ server_data->migration.state = state;
return 0;
}
@@ -265,27 +290,76 @@ migration_prepare_data(void *pvt, __u64 *offset, __u64 *size)
struct server_data *server_data = pvt;
*offset = 0;
- *size = server_data->migration.data_size = MIN(server_data->migration.pending_bytes, sysconf(_SC_PAGESIZE) / 4);
+ *size = server_data->migration.data_size = MIN(server_data->migration.pending_bytes, server_data->migration.migr_data_len / 4);
return 0;
}
static size_t
-migration_read_data(void *pvt, UNUSED void *buf, __u64 size,
- UNUSED __u64 offset)
+migration_read_data(void *pvt, void *buf, __u64 size, __u64 offset)
{
struct server_data *server_data = pvt;
if (server_data->migration.data_size < size) {
- assert(false);
+ lm_log(server_data->lm_ctx, LM_ERR,
+ "invalid migration data read %#lx-%#lx",
+ offset, offset + size - 1);
+ return -EINVAL;
}
- return 0;
+ /* FIXME implement, client should be able to write any byte range */
+ assert((offset == 0 && size >= sizeof server_data->bar0)
+ || offset >= sizeof server_data->bar0);
+
+ if (offset == 0 && size >= sizeof server_data->bar0) {
+ memcpy(buf, &server_data->bar0, sizeof server_data->bar0);
+ }
+ return size;
}
static size_t
-migration_write_data(UNUSED void *pvt, UNUSED void *data, UNUSED __u64 size)
+migration_write_data(void *pvt, void *data, __u64 size, __u64 offset)
{
- assert(false);
+ struct server_data *server_data = pvt;
+
+ assert(server_data != NULL);
+ assert(data != NULL);
+
+ if (offset + size > server_data->migration.migr_data_len) {
+ lm_log(server_data->lm_ctx, LM_ERR,
+ "invalid write %#llx-%#llx", offset, offset + size - 1);
+ }
+
+ memcpy(server_data->migration.migr_data + offset, data, size);
+
+ return 0;
+}
+
+
+static int
+migration_data_written(void *pvt, __u64 count, __u64 offset)
+{
+ int ret;
+ struct server_data *server_data = pvt;
+
+ assert(server_data != NULL);
+
+ if (offset + count > server_data->migration.migr_data_len) {
+ lm_log(server_data->lm_ctx, LM_ERR,
+ "bad migration data range %#llx-%#llx",
+ offset, offset + count - 1);
+ return -EINVAL;
+ }
+
+ if (offset == 0 && count >= sizeof server_data->bar0) {
+
+ /* apply device state */
+ /* FIXME must arm timer only after device is resumed!!! */
+ ret = bar0_access(pvt, server_data->migration.migr_data,
+ sizeof server_data->bar0, 0, true);
+ if (ret < 0) {
+ return ret;
+ }
+ }
return 0;
}
@@ -295,7 +369,13 @@ int main(int argc, char *argv[]){
bool verbose = false;
char opt;
struct sigaction act = {.sa_handler = _sa_handler};
- struct server_data server_data = {0};
+ struct server_data server_data = {
+ .migration = {
+ /* one page so that we can memory map it */
+ .migr_data_len = sysconf(_SC_PAGESIZE),
+ .state = LM_MIGR_STATE_RUNNING
+ }
+ };
int nr_sparse_areas = 2, size = 1024, i;
struct lm_sparse_mmap_areas *sparse_areas;
lm_ctx_t *lm_ctx;
@@ -349,7 +429,12 @@ int main(int argc, char *argv[]){
},
.reg_info[LM_DEV_MIGRATION_REG_IDX] = { /* migration region */
.flags = LM_REG_FLAG_RW,
- .size = sizeof(struct vfio_device_migration_info) + sysconf(_SC_PAGESIZE),
+ /*
+ * FIXME don't declare support for migration via a region, this
+ * is a VFIO artifact, make it something different. We still
+ * have to make the migration data memory mappable.
+ */
+ .size = sizeof(struct vfio_device_migration_info) + server_data.migration.migr_data_len,
.mmap_areas = sparse_areas,
},
.irq_count[LM_DEV_INTX_IRQ_IDX] = 1,
@@ -364,6 +449,7 @@ int main(int argc, char *argv[]){
.get_pending_bytes = &migration_get_pending_bytes,
.prepare_data = &migration_prepare_data,
.read_data = &migration_read_data,
+ .data_written = &migration_data_written,
.write_data = &migration_write_data
}
};
@@ -378,6 +464,12 @@ int main(int argc, char *argv[]){
err(EXIT_FAILURE, "failed to initialize device emulation\n");
}
+ server_data.migration.migr_data = aligned_alloc(server_data.migration.migr_data_len,
+ server_data.migration.migr_data_len);
+ if (server_data.migration.migr_data == NULL) {
+ errx(EXIT_FAILURE, "failed to allocate migration data");
+ }
+
do {
ret = lm_ctx_drive(lm_ctx);
if (ret == -EINTR) {