aboutsummaryrefslogtreecommitdiff
path: root/samples/server.c
diff options
context:
space:
mode:
authorThanos Makatos <thanos.makatos@nutanix.com>2021-01-25 17:39:35 +0000
committerGitHub <noreply@github.com>2021-01-25 17:39:35 +0000
commitf2fe9d8e4fc4c5eb80dc61cc243a18087cc12ca3 (patch)
tree06c6a12d428fbfadbcccf0dea780c393f076f6ca /samples/server.c
parent915a5b01f34a61cacef5ae5b50a215d13628b8b0 (diff)
downloadlibvfio-user-f2fe9d8e4fc4c5eb80dc61cc243a18087cc12ca3.zip
libvfio-user-f2fe9d8e4fc4c5eb80dc61cc243a18087cc12ca3.tar.gz
libvfio-user-f2fe9d8e4fc4c5eb80dc61cc243a18087cc12ca3.tar.bz2
add pre-copy phase in live migration example (#247)
This patch adds a simplistic pre-copy phase in the live migration sample. The end goal is have a separate thread in the client to modify device state while the device is in the pre-copy phase. This will be done in later patches. BAR1 is saved during the pre-copy phase and BAR0 is saved during the stop-and-copy phase. This is purely for convenience. There are quite a few assumptions and FIXMEs, even in the client code. We plan to address them in future patches. Signed-off-by: Thanos Makatos <thanos.makatos@nutanix.com>
Diffstat (limited to 'samples/server.c')
-rw-r--r--samples/server.c157
1 files changed, 104 insertions, 53 deletions
diff --git a/samples/server.c b/samples/server.c
index 4c22922..f7a1cd1 100644
--- a/samples/server.c
+++ b/samples/server.c
@@ -58,7 +58,8 @@ struct dma_regions {
struct server_data {
time_t bar0;
- uint8_t *bar1;
+ void *bar1;
+ size_t bar1_size;
struct dma_regions regions[NR_DMA_REGIONS];
struct {
__u64 pending_bytes;
@@ -118,15 +119,26 @@ bar0_access(vfu_ctx_t *vfu_ctx, char * const buf, size_t count, loff_t offset,
}
ssize_t
-bar1_access(vfu_ctx_t *vfu_ctx UNUSED, UNUSED char * const buf,
- UNUSED size_t count, UNUSED loff_t offset,
- UNUSED const bool is_write)
+bar1_access(vfu_ctx_t *vfu_ctx, char * const buf,
+ size_t count, loff_t offset,
+ const bool is_write)
{
- assert(false);
+ struct server_data *server_data = vfu_get_private(vfu_ctx);
+
+ if (offset + count > server_data->bar1_size) {
+ vfu_log(vfu_ctx, LOG_ERR, "bad BAR1 access %#lx-%#lx",
+ offset, offset + count - 1);
+ errno = EINVAL;
+ return -1;
+ }
- /* FIXME assert that only the 2nd page is accessed */
+ if (is_write) {
+ memcpy(server_data->bar1 + offset, buf, count);
+ } else {
+ memcpy(buf, server_data->bar1, count);
+ }
- return -ENOTSUP;
+ return count;
}
bool irq_triggered = false;
@@ -253,8 +265,12 @@ migration_device_state_transition(vfu_ctx_t *vfu_ctx, vfu_migr_state_t state)
switch (state) {
case VFU_MIGR_STATE_STOP_AND_COPY:
+ server_data->migration.pending_bytes = sizeof(time_t); /* FIXME BAR0 region size */
+ server_data->migration.data_size = 0;
+ break;
+ case VFU_MIGR_STATE_PRE_COPY:
/* TODO must be less than size of data region in migration region */
- server_data->migration.pending_bytes = sysconf(_SC_PAGESIZE);
+ server_data->migration.pending_bytes = server_data->bar1_size;
break;
case VFU_MIGR_STATE_STOP:
assert(server_data->migration.pending_bytes == 0);
@@ -291,6 +307,10 @@ migration_prepare_data(vfu_ctx_t *vfu_ctx, __u64 *offset, __u64 *size)
struct server_data *server_data = vfu_get_private(vfu_ctx);
*offset = 0;
+ /*
+ * Don't provide all migration data in one go in order to make it a bit
+ * more interesting.
+ */
*size = server_data->migration.data_size = MIN(server_data->migration.pending_bytes, server_data->migration.migr_data_len / 4);
return 0;
}
@@ -299,6 +319,8 @@ static size_t
migration_read_data(vfu_ctx_t *vfu_ctx, void *buf, __u64 size, __u64 offset)
{
struct server_data *server_data = vfu_get_private(vfu_ctx);
+ uint8_t *p;
+ size_t bar_size;
if (server_data->migration.data_size < size) {
vfu_log(vfu_ctx, LOG_ERR, "invalid migration data read %#llx-%#llx",
@@ -306,13 +328,38 @@ migration_read_data(vfu_ctx_t *vfu_ctx, void *buf, __u64 size, __u64 offset)
return -EINVAL;
}
- /* FIXME implement, client should be able to write any byte range */
- assert((offset == 0 && size >= sizeof server_data->bar0)
- || offset >= sizeof server_data->bar0);
+ /*
+ * If in pre-copy state we copy BAR1, if in stop-and-copy state we copy
+ * BAR0. This behavior is purely an artifact of this server implementation
+ * simply to make it as simple as possible. Note that the client might go
+ * from state running to stop-and-copy, completely skipping the pre-copy
+ * state. This is legitimate but we don't support it for now.
+ *
+ * FIXME implement transitioning from the running state straight to the
+ * stop-and-copy state.
+ */
- if (offset == 0 && size >= sizeof server_data->bar0) {
- memcpy(buf, &server_data->bar0, sizeof server_data->bar0);
+ if (server_data->migration.state == VFU_MIGR_STATE_PRE_COPY) {
+ p = server_data->bar1;
+ bar_size = server_data->bar1_size;
+ } else if (server_data->migration.state == VFU_MIGR_STATE_STOP_AND_COPY) {
+ p = (uint8_t*)&server_data->bar0;
+ bar_size = sizeof server_data->bar0;
+ } else {
+ /*
+ * Reading from the migration region in any other state is undefined
+ * (I think).
+ */
+ return 0;
}
+ if (offset > bar_size) {
+ errno = EINVAL;
+ return -1;
+ }
+ if (offset + size > bar_size) {
+ size = bar_size - offset;
+ }
+ memcpy(buf, p + offset, size);
return size;
}
@@ -324,41 +371,44 @@ migration_write_data(vfu_ctx_t *vfu_ctx, void *data, __u64 size, __u64 offset)
assert(server_data != NULL);
assert(data != NULL);
- if (offset + size > server_data->migration.migr_data_len) {
- vfu_log(vfu_ctx, LOG_ERR, "invalid write %#llx-%#llx",
- offset, offset + size - 1);
- }
+ /*
+ * During pre-copy state we save BAR1 and during stop-and-copy state we
+ * save BAR0.
+ */
+ vfu_log(vfu_ctx, LOG_DEBUG,
+ "apply device migration data to BAR%d %#llx-%#llx",
+ offset < server_data->bar1_size ? 1 : 0,
+ offset, offset + size - 1);
+
+ if (offset < server_data->bar1_size) {
+ assert(offset + size <= server_data->bar1_size); /* FIXME */
+ memcpy(server_data->bar1 + offset, data, size);
+ } else {
+ int ret;
- memcpy(server_data->migration.migr_data + offset, data, size);
+ /* FIXME should be able to write any valid subrange */
+ assert(offset - server_data->bar1_size == 0);
+ assert(size == sizeof server_data->bar0);
+
+ ret = bar0_access(vfu_ctx, data, sizeof server_data->bar0, 0, true);
+
+ assert(ret == (int)size); /* FIXME */
+ }
return 0;
}
static int
-migration_data_written(vfu_ctx_t *vfu_ctx, __u64 count, __u64 offset)
+migration_data_written(UNUSED vfu_ctx_t *vfu_ctx, UNUSED __u64 count,
+ UNUSED __u64 offset)
{
- struct server_data *server_data = vfu_get_private(vfu_ctx);
- int ret;
-
- assert(server_data != NULL);
-
- if (offset + count > server_data->migration.migr_data_len) {
- vfu_log(vfu_ctx, LOG_ERR, "bad migration data range %#llx-%#llx",
- offset, offset + count - 1);
- return -EINVAL;
- }
-
- if (offset == 0 && count >= sizeof server_data->bar0) {
-
- /* apply device state */
- /* FIXME must arm timer only after device is resumed!!! */
- ret = bar0_access(vfu_ctx, server_data->migration.migr_data,
- sizeof server_data->bar0, 0, true);
- if (ret < 0) {
- return ret;
- }
- }
+ /*
+ * We apply migration state directly in the migration_write_data callback,
+ * we don't need to do anything here. We would have to apply migration
+ * state in this callback if the migration region was memory mappable, in
+ * which case we wouldn't know when the client wrote migration data.
+ */
return 0;
}
@@ -369,10 +419,10 @@ int main(int argc, char *argv[])
bool verbose = false;
char opt;
struct sigaction act = {.sa_handler = _sa_handler};
+ size_t bar1_size = 0x3000;
struct server_data server_data = {
.migration = {
- /* one page so that we can memory map it */
- .migr_data_len = sysconf(_SC_PAGESIZE),
+ .migr_data_len = bar1_size + sizeof(time_t),
.state = VFU_MIGR_STATE_RUNNING
}
};
@@ -393,12 +443,6 @@ int main(int argc, char *argv[])
errx(EXIT_FAILURE, "missing vfio-user socket path");
}
- /* coverity[NEGATIVE_RETURNS] */
- server_data.bar1 = malloc(sysconf(_SC_PAGESIZE));
- if (server_data.bar1 == NULL) {
- err(EXIT_FAILURE, "BAR1");
- }
-
sigemptyset(&act.sa_mask);
if (sigaction(SIGALRM, &act, NULL) == -1) {
err(EXIT_FAILURE, "failed to register signal handler");
@@ -431,21 +475,29 @@ int main(int argc, char *argv[])
/*
* Setup BAR1 to be 3 pages in size where only the first and the last pages
- * are mappable.
+ * are mappable. The client can still mmap the 2nd page, we can't prohibit
+ * this under Linux. If we really want to prohibit it we have to use
+ * separate files for the same region.
*/
if ((fp = tmpfile()) == NULL) {
err(EXIT_FAILURE, "failed to create BAR1 file");
}
- if (ftruncate(fileno(fp), 0x3000) == -1) {
+ server_data.bar1_size = bar1_size;
+ if (ftruncate(fileno(fp), server_data.bar1_size) == -1) {
err(EXIT_FAILURE, "failed to truncate BAR1 file");
}
+ server_data.bar1 = mmap(NULL, server_data.bar1_size, PROT_READ | PROT_WRITE,
+ MAP_SHARED, fileno(fp), 0);
+ if (server_data.bar1 == MAP_FAILED) {
+ err(EXIT_FAILURE, "failed to mmap BAR1");
+ }
struct iovec mmap_areas[] = {
{ .iov_base = (void*)0, .iov_len = 0x1000 },
{ .iov_base = (void*)0x2000, .iov_len = 0x1000 }
};
ret = vfu_setup_region(vfu_ctx, VFU_PCI_DEV_BAR1_REGION_IDX,
- 0x3000, &bar1_access, VFU_REGION_FLAG_RW,
- mmap_areas, 2, fileno(fp));
+ server_data.bar1_size, &bar1_access,
+ VFU_REGION_FLAG_RW, mmap_areas, 2, fileno(fp));
if (ret < 0) {
err(EXIT_FAILURE, "failed to setup BAR1 region");
}
@@ -525,7 +577,6 @@ int main(int argc, char *argv[])
vfu_destroy_ctx(vfu_ctx);
free(server_data.migration.migr_data);
- free(server_data.bar1);
return EXIT_SUCCESS;
}