aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS1
-rw-r--r--include/migration/register.h15
-rw-r--r--migration/migration.c164
-rw-r--r--migration/multifd.c5
-rw-r--r--migration/multifd.h5
-rw-r--r--migration/options.c30
-rw-r--r--migration/options.h1
-rw-r--r--migration/ram.c168
-rw-r--r--migration/rdma.c191
-rw-r--r--migration/rdma.h3
-rw-r--r--migration/savevm.c33
-rw-r--r--migration/savevm.h1
-rwxr-xr-xscripts/rdma-migration-helper.sh70
-rwxr-xr-xscripts/vmstate-static-checker.py1
-rw-r--r--tests/qtest/migration/compression-tests.c22
-rw-r--r--tests/qtest/migration/cpr-tests.c6
-rw-r--r--tests/qtest/migration/file-tests.c58
-rw-r--r--tests/qtest/migration/framework.c75
-rw-r--r--tests/qtest/migration/framework.h9
-rw-r--r--tests/qtest/migration/misc-tests.c4
-rw-r--r--tests/qtest/migration/postcopy-tests.c8
-rw-r--r--tests/qtest/migration/precopy-tests.c91
-rw-r--r--tests/qtest/migration/tls-tests.c23
23 files changed, 614 insertions, 370 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index b3f9f26..8cd9626 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3538,6 +3538,7 @@ R: Li Zhijian <lizhijian@fujitsu.com>
R: Peter Xu <peterx@redhat.com>
S: Odd Fixes
F: migration/rdma*
+F: scripts/rdma-migration-helper.sh
Migration dirty limit and dirty page rate
M: Hyman Huang <yong.huang@smartx.com>
diff --git a/include/migration/register.h b/include/migration/register.h
index c041ce3..b79dc81 100644
--- a/include/migration/register.h
+++ b/include/migration/register.h
@@ -190,6 +190,21 @@ typedef struct SaveVMHandlers {
/* This runs outside the BQL! */
/**
+ * @save_postcopy_prepare
+ *
+ * This hook will be invoked on the source side right before switching
+ * to postcopy (before VM stopped).
+ *
+ * @f: QEMUFile where to send the data
+ * @opaque: Data pointer passed to register_savevm_live()
+ * @errp: Error** used to report error message
+ *
+ * Returns: true if succeeded, false if error occured. When false is
+ * returned, @errp must be set.
+ */
+ bool (*save_postcopy_prepare)(QEMUFile *f, void *opaque, Error **errp);
+
+ /**
* @state_pending_estimate
*
* This estimates the remaining data to transfer
diff --git a/migration/migration.c b/migration/migration.c
index 55ec4bf..4697732 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -95,6 +95,9 @@ enum mig_rp_message_type {
MIG_RP_MSG_MAX
};
+/* Migration channel types */
+enum { CH_MAIN, CH_MULTIFD, CH_POSTCOPY };
+
/* When we add fault tolerance, we could have several
migrations at once. For now we don't need to add
dynamic creation of migration */
@@ -259,6 +262,24 @@ migration_channels_and_transport_compatible(MigrationAddress *addr,
return true;
}
+static bool
+migration_capabilities_and_transport_compatible(MigrationAddress *addr,
+ Error **errp)
+{
+ if (addr->transport == MIGRATION_ADDRESS_TYPE_RDMA) {
+ return migrate_rdma_caps_check(migrate_get_current()->capabilities,
+ errp);
+ }
+
+ return true;
+}
+
+static bool migration_transport_compatible(MigrationAddress *addr, Error **errp)
+{
+ return migration_channels_and_transport_compatible(addr, errp) &&
+ migration_capabilities_and_transport_compatible(addr, errp);
+}
+
static gint page_request_addr_cmp(gconstpointer ap, gconstpointer bp)
{
uintptr_t a = (uintptr_t) ap, b = (uintptr_t) bp;
@@ -750,7 +771,7 @@ static void qemu_start_incoming_migration(const char *uri, bool has_channels,
}
/* transport mechanism not suitable for migration? */
- if (!migration_channels_and_transport_compatible(addr, errp)) {
+ if (!migration_transport_compatible(addr, errp)) {
return;
}
@@ -769,14 +790,6 @@ static void qemu_start_incoming_migration(const char *uri, bool has_channels,
}
#ifdef CONFIG_RDMA
} else if (addr->transport == MIGRATION_ADDRESS_TYPE_RDMA) {
- if (migrate_xbzrle()) {
- error_setg(errp, "RDMA and XBZRLE can't be used together");
- return;
- }
- if (migrate_multifd()) {
- error_setg(errp, "RDMA and multifd can't be used together");
- return;
- }
rdma_start_incoming_migration(&addr->u.rdma, errp);
#endif
} else if (addr->transport == MIGRATION_ADDRESS_TYPE_EXEC) {
@@ -931,9 +944,8 @@ static void migration_incoming_setup(QEMUFile *f)
{
MigrationIncomingState *mis = migration_incoming_get_current();
- if (!mis->from_src_file) {
- mis->from_src_file = f;
- }
+ assert(!mis->from_src_file);
+ mis->from_src_file = f;
qemu_file_set_blocking(f, false);
}
@@ -985,28 +997,19 @@ void migration_fd_process_incoming(QEMUFile *f)
migration_incoming_process();
}
-/*
- * Returns true when we want to start a new incoming migration process,
- * false otherwise.
- */
-static bool migration_should_start_incoming(bool main_channel)
+static bool migration_has_main_and_multifd_channels(void)
{
- /* Multifd doesn't start unless all channels are established */
- if (migrate_multifd()) {
- return migration_has_all_channels();
+ MigrationIncomingState *mis = migration_incoming_get_current();
+ if (!mis->from_src_file) {
+ /* main channel not established */
+ return false;
}
- /* Preempt channel only starts when the main channel is created */
- if (migrate_postcopy_preempt()) {
- return main_channel;
+ if (migrate_multifd() && !multifd_recv_all_channels_created()) {
+ return false;
}
- /*
- * For all the rest types of migration, we should only reach here when
- * it's the main channel that's being created, and we should always
- * proceed with this channel.
- */
- assert(main_channel);
+ /* main and all multifd channels are established */
return true;
}
@@ -1015,59 +1018,81 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
MigrationIncomingState *mis = migration_incoming_get_current();
Error *local_err = NULL;
QEMUFile *f;
- bool default_channel = true;
+ uint8_t channel;
uint32_t channel_magic = 0;
int ret = 0;
- if (migrate_multifd() && !migrate_mapped_ram() &&
- !migrate_postcopy_ram() &&
- qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) {
- /*
- * With multiple channels, it is possible that we receive channels
- * out of order on destination side, causing incorrect mapping of
- * source channels on destination side. Check channel MAGIC to
- * decide type of channel. Please note this is best effort, postcopy
- * preempt channel does not send any magic number so avoid it for
- * postcopy live migration. Also tls live migration already does
- * tls handshake while initializing main channel so with tls this
- * issue is not possible.
- */
- ret = migration_channel_read_peek(ioc, (void *)&channel_magic,
- sizeof(channel_magic), errp);
+ if (!migration_has_main_and_multifd_channels()) {
+ if (qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) {
+ /*
+ * With multiple channels, it is possible that we receive channels
+ * out of order on destination side, causing incorrect mapping of
+ * source channels on destination side. Check channel MAGIC to
+ * decide type of channel. Please note this is best effort,
+ * postcopy preempt channel does not send any magic number so
+ * avoid it for postcopy live migration. Also tls live migration
+ * already does tls handshake while initializing main channel so
+ * with tls this issue is not possible.
+ */
+ ret = migration_channel_read_peek(ioc, (void *)&channel_magic,
+ sizeof(channel_magic), errp);
+ if (ret != 0) {
+ return;
+ }
- if (ret != 0) {
+ channel_magic = be32_to_cpu(channel_magic);
+ if (channel_magic == QEMU_VM_FILE_MAGIC) {
+ channel = CH_MAIN;
+ } else if (channel_magic == MULTIFD_MAGIC) {
+ assert(migrate_multifd());
+ channel = CH_MULTIFD;
+ } else if (!mis->from_src_file &&
+ mis->state == MIGRATION_STATUS_POSTCOPY_PAUSED) {
+ /* reconnect main channel for postcopy recovery */
+ channel = CH_MAIN;
+ } else {
+ error_setg(errp, "unknown channel magic: %u", channel_magic);
+ return;
+ }
+ } else if (mis->from_src_file && migrate_multifd()) {
+ /*
+ * Non-peekable channels like tls/file are processed as
+ * multifd channels when multifd is enabled.
+ */
+ channel = CH_MULTIFD;
+ } else if (!mis->from_src_file) {
+ channel = CH_MAIN;
+ } else {
+ error_setg(errp, "non-peekable channel used without multifd");
return;
}
-
- default_channel = (channel_magic == cpu_to_be32(QEMU_VM_FILE_MAGIC));
} else {
- default_channel = !mis->from_src_file;
+ assert(migrate_postcopy_preempt());
+ channel = CH_POSTCOPY;
}
if (multifd_recv_setup(errp) != 0) {
return;
}
- if (default_channel) {
+ if (channel == CH_MAIN) {
f = qemu_file_new_input(ioc);
migration_incoming_setup(f);
- } else {
+ } else if (channel == CH_MULTIFD) {
/* Multiple connections */
- assert(migration_needs_multiple_sockets());
- if (migrate_multifd()) {
- multifd_recv_new_channel(ioc, &local_err);
- } else {
- assert(migrate_postcopy_preempt());
- f = qemu_file_new_input(ioc);
- postcopy_preempt_new_channel(mis, f);
- }
+ multifd_recv_new_channel(ioc, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
}
+ } else if (channel == CH_POSTCOPY) {
+ assert(!mis->postcopy_qemufile_dst);
+ f = qemu_file_new_input(ioc);
+ postcopy_preempt_new_channel(mis, f);
+ return;
}
- if (migration_should_start_incoming(default_channel)) {
+ if (migration_has_main_and_multifd_channels()) {
/* If it's a recovery, we're done */
if (postcopy_try_recover()) {
return;
@@ -1084,18 +1109,13 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
*/
bool migration_has_all_channels(void)
{
- MigrationIncomingState *mis = migration_incoming_get_current();
-
- if (!mis->from_src_file) {
+ if (!migration_has_main_and_multifd_channels()) {
return false;
}
- if (migrate_multifd()) {
- return multifd_recv_all_channels_created();
- }
-
- if (migrate_postcopy_preempt()) {
- return mis->postcopy_qemufile_dst != NULL;
+ MigrationIncomingState *mis = migration_incoming_get_current();
+ if (migrate_postcopy_preempt() && !mis->postcopy_qemufile_dst) {
+ return false;
}
return true;
@@ -2208,7 +2228,7 @@ void qmp_migrate(const char *uri, bool has_channels,
}
/* transport mechanism not suitable for migration? */
- if (!migration_channels_and_transport_compatible(addr, errp)) {
+ if (!migration_transport_compatible(addr, errp)) {
return;
}
@@ -2707,6 +2727,10 @@ static int postcopy_start(MigrationState *ms, Error **errp)
}
}
+ if (!qemu_savevm_state_postcopy_prepare(ms->to_dst_file, errp)) {
+ return -1;
+ }
+
trace_postcopy_start();
bql_lock();
trace_postcopy_start_set_run();
diff --git a/migration/multifd.c b/migration/multifd.c
index 86c83e4..ec108af 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -36,11 +36,6 @@
#include "io/channel-socket.h"
#include "yank_functions.h"
-/* Multiple fd's */
-
-#define MULTIFD_MAGIC 0x11223344U
-#define MULTIFD_VERSION 1
-
typedef struct {
uint32_t magic;
uint32_t version;
diff --git a/migration/multifd.h b/migration/multifd.h
index 2d337e7..9b6d81e 100644
--- a/migration/multifd.h
+++ b/migration/multifd.h
@@ -49,6 +49,11 @@ bool multifd_queue_page(RAMBlock *block, ram_addr_t offset);
bool multifd_recv(void);
MultiFDRecvData *multifd_get_recv_data(void);
+/* Multiple fd's */
+
+#define MULTIFD_MAGIC 0x11223344U
+#define MULTIFD_VERSION 1
+
/* Multifd Compression flags */
#define MULTIFD_FLAG_SYNC (1 << 0)
diff --git a/migration/options.c b/migration/options.c
index b0ac2ea..b6ae953 100644
--- a/migration/options.c
+++ b/migration/options.c
@@ -448,6 +448,24 @@ static bool migrate_incoming_started(void)
return !!migration_incoming_get_current()->transport_data;
}
+bool migrate_rdma_caps_check(bool *caps, Error **errp)
+{
+ if (caps[MIGRATION_CAPABILITY_XBZRLE]) {
+ error_setg(errp, "RDMA and XBZRLE can't be used together");
+ return false;
+ }
+ if (caps[MIGRATION_CAPABILITY_MULTIFD]) {
+ error_setg(errp, "RDMA and multifd can't be used together");
+ return false;
+ }
+ if (caps[MIGRATION_CAPABILITY_POSTCOPY_RAM]) {
+ error_setg(errp, "RDMA and postcopy-ram can't be used together");
+ return false;
+ }
+
+ return true;
+}
+
/**
* @migration_caps_check - check capability compatibility
*
@@ -611,6 +629,13 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp)
}
}
+ /*
+ * On destination side, check the cases that capability is being set
+ * after incoming thread has started.
+ */
+ if (migrate_rdma() && !migrate_rdma_caps_check(new_caps, errp)) {
+ return false;
+ }
return true;
}
@@ -1193,6 +1218,11 @@ static void migrate_params_test_apply(MigrateSetParameters *params,
dest->tls_hostname = params->tls_hostname->u.s;
}
+ if (params->tls_authz) {
+ assert(params->tls_authz->type == QTYPE_QSTRING);
+ dest->tls_authz = params->tls_authz->u.s;
+ }
+
if (params->has_max_bandwidth) {
dest->max_bandwidth = params->max_bandwidth;
}
diff --git a/migration/options.h b/migration/options.h
index 762be4e..82d8397 100644
--- a/migration/options.h
+++ b/migration/options.h
@@ -57,6 +57,7 @@ bool migrate_tls(void);
/* capabilities helpers */
+bool migrate_rdma_caps_check(bool *caps, Error **errp);
bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp);
/* parameters */
diff --git a/migration/ram.c b/migration/ram.c
index cb8b2ed..e12913b 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -91,6 +91,36 @@
XBZRLECacheStats xbzrle_counters;
+/*
+ * This structure locates a specific location of a guest page. In QEMU,
+ * it's described in a tuple of (ramblock, offset).
+ */
+struct PageLocation {
+ RAMBlock *block;
+ unsigned long offset;
+};
+typedef struct PageLocation PageLocation;
+
+/**
+ * PageLocationHint: describes a hint to a page location
+ *
+ * @valid set if the hint is vaild and to be consumed
+ * @location: the hint content
+ *
+ * In postcopy preempt mode, the urgent channel may provide hints to the
+ * background channel, so that QEMU source can try to migrate whatever is
+ * right after the requested urgent pages.
+ *
+ * This is based on the assumption that the VM (already running on the
+ * destination side) tends to access the memory with spatial locality.
+ * This is also the default behavior of vanilla postcopy (preempt off).
+ */
+struct PageLocationHint {
+ bool valid;
+ PageLocation location;
+};
+typedef struct PageLocationHint PageLocationHint;
+
/* used by the search for pages to send */
struct PageSearchStatus {
/* The migration channel used for a specific host page */
@@ -395,6 +425,13 @@ struct RAMState {
* RAM migration.
*/
unsigned int postcopy_bmap_sync_requested;
+ /*
+ * Page hint during postcopy when preempt mode is on. Return path
+ * thread sets it, while background migration thread consumes it.
+ *
+ * Protected by @bitmap_mutex.
+ */
+ PageLocationHint page_hint;
};
typedef struct RAMState RAMState;
@@ -1144,32 +1181,6 @@ static int save_zero_page(RAMState *rs, PageSearchStatus *pss,
}
/*
- * @pages: the number of pages written by the control path,
- * < 0 - error
- * > 0 - number of pages written
- *
- * Return true if the pages has been saved, otherwise false is returned.
- */
-static bool control_save_page(PageSearchStatus *pss,
- ram_addr_t offset, int *pages)
-{
- int ret;
-
- ret = rdma_control_save_page(pss->pss_channel, pss->block->offset, offset,
- TARGET_PAGE_SIZE);
- if (ret == RAM_SAVE_CONTROL_NOT_SUPP) {
- return false;
- }
-
- if (ret == RAM_SAVE_CONTROL_DELAYED) {
- *pages = 1;
- return true;
- }
- *pages = ret;
- return true;
-}
-
-/*
* directly send the page to the stream
*
* Returns the number of pages written.
@@ -1965,7 +1976,13 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss)
int res;
/* Hand over to RDMA first */
- if (control_save_page(pss, offset, &res)) {
+ if (migrate_rdma()) {
+ res = rdma_control_save_page(pss->pss_channel, pss->block->offset,
+ offset, TARGET_PAGE_SIZE);
+
+ if (res == RAM_SAVE_CONTROL_DELAYED) {
+ res = 1;
+ }
return res;
}
@@ -2039,6 +2056,21 @@ static void pss_host_page_finish(PageSearchStatus *pss)
pss->host_page_start = pss->host_page_end = 0;
}
+static void ram_page_hint_update(RAMState *rs, PageSearchStatus *pss)
+{
+ PageLocationHint *hint = &rs->page_hint;
+
+ /* If there's a pending hint not consumed, don't bother */
+ if (hint->valid) {
+ return;
+ }
+
+ /* Provide a hint to the background stream otherwise */
+ hint->location.block = pss->block;
+ hint->location.offset = pss->page;
+ hint->valid = true;
+}
+
/*
* Send an urgent host page specified by `pss'. Need to be called with
* bitmap_mutex held.
@@ -2084,6 +2116,7 @@ out:
/* For urgent requests, flush immediately if sent */
if (sent) {
qemu_fflush(pss->pss_channel);
+ ram_page_hint_update(rs, pss);
}
return ret;
}
@@ -2171,6 +2204,30 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss)
return (res < 0 ? res : pages);
}
+static bool ram_page_hint_valid(RAMState *rs)
+{
+ /* There's only page hint during postcopy preempt mode */
+ if (!postcopy_preempt_active()) {
+ return false;
+ }
+
+ return rs->page_hint.valid;
+}
+
+static void ram_page_hint_collect(RAMState *rs, RAMBlock **block,
+ unsigned long *page)
+{
+ PageLocationHint *hint = &rs->page_hint;
+
+ assert(hint->valid);
+
+ *block = hint->location.block;
+ *page = hint->location.offset;
+
+ /* Mark the hint consumed */
+ hint->valid = false;
+}
+
/**
* ram_find_and_save_block: finds a dirty page and sends it to f
*
@@ -2187,6 +2244,8 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss)
static int ram_find_and_save_block(RAMState *rs)
{
PageSearchStatus *pss = &rs->pss[RAM_CHANNEL_PRECOPY];
+ unsigned long next_page;
+ RAMBlock *next_block;
int pages = 0;
/* No dirty page as there is zero RAM */
@@ -2206,7 +2265,14 @@ static int ram_find_and_save_block(RAMState *rs)
rs->last_page = 0;
}
- pss_init(pss, rs->last_seen_block, rs->last_page);
+ if (ram_page_hint_valid(rs)) {
+ ram_page_hint_collect(rs, &next_block, &next_page);
+ } else {
+ next_block = rs->last_seen_block;
+ next_page = rs->last_page;
+ }
+
+ pss_init(pss, next_block, next_page);
while (true){
if (!get_queued_page(rs, pss)) {
@@ -2339,6 +2405,13 @@ static void ram_save_cleanup(void *opaque)
ram_state_cleanup(rsp);
}
+static void ram_page_hint_reset(PageLocationHint *hint)
+{
+ hint->location.block = NULL;
+ hint->location.offset = 0;
+ hint->valid = false;
+}
+
static void ram_state_reset(RAMState *rs)
{
int i;
@@ -2351,6 +2424,8 @@ static void ram_state_reset(RAMState *rs)
rs->last_page = 0;
rs->last_version = ram_list.version;
rs->xbzrle_started = false;
+
+ ram_page_hint_reset(&rs->page_hint);
}
#define MAX_WAIT 50 /* ms, half buffered_file limit */
@@ -4418,6 +4493,42 @@ static int ram_resume_prepare(MigrationState *s, void *opaque)
return 0;
}
+static bool ram_save_postcopy_prepare(QEMUFile *f, void *opaque, Error **errp)
+{
+ int ret;
+
+ if (migrate_multifd()) {
+ /*
+ * When multifd is enabled, source QEMU needs to make sure all the
+ * pages queued before postcopy starts have been flushed.
+ *
+ * The load of these pages must happen before switching to postcopy.
+ * It's because loading of guest pages (so far) in multifd recv
+ * threads is still non-atomic, so the load cannot happen with vCPUs
+ * running on the destination side.
+ *
+ * This flush and sync will guarantee that those pages are loaded
+ * _before_ postcopy starts on the destination. The rationale is,
+ * this happens before VM stops (and before source QEMU sends all
+ * the rest of the postcopy messages). So when the destination QEMU
+ * receives the postcopy messages, it must have received the sync
+ * message on the main channel (either RAM_SAVE_FLAG_MULTIFD_FLUSH,
+ * or RAM_SAVE_FLAG_EOS), and such message would guarantee that
+ * all previous guest pages queued in the multifd channels are
+ * completely loaded.
+ */
+ ret = multifd_ram_flush_and_sync(f);
+ if (ret < 0) {
+ error_setg(errp, "%s: multifd flush and sync failed", __func__);
+ return false;
+ }
+ }
+
+ qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
+
+ return true;
+}
+
void postcopy_preempt_shutdown_file(MigrationState *s)
{
qemu_put_be64(s->postcopy_qemufile_src, RAM_SAVE_FLAG_EOS);
@@ -4437,6 +4548,7 @@ static SaveVMHandlers savevm_ram_handlers = {
.load_setup = ram_load_setup,
.load_cleanup = ram_load_cleanup,
.resume_prepare = ram_resume_prepare,
+ .save_postcopy_prepare = ram_save_postcopy_prepare,
};
static void ram_mig_ram_block_resized(RAMBlockNotifier *n, void *host,
diff --git a/migration/rdma.c b/migration/rdma.c
index b31652b..2d839fc 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -768,156 +768,12 @@ static void qemu_rdma_dump_gid(const char *who, struct rdma_cm_id *id)
}
/*
- * As of now, IPv6 over RoCE / iWARP is not supported by linux.
- * We will try the next addrinfo struct, and fail if there are
- * no other valid addresses to bind against.
- *
- * If user is listening on '[::]', then we will not have a opened a device
- * yet and have no way of verifying if the device is RoCE or not.
- *
- * In this case, the source VM will throw an error for ALL types of
- * connections (both IPv4 and IPv6) if the destination machine does not have
- * a regular infiniband network available for use.
- *
- * The only way to guarantee that an error is thrown for broken kernels is
- * for the management software to choose a *specific* interface at bind time
- * and validate what time of hardware it is.
- *
- * Unfortunately, this puts the user in a fix:
- *
- * If the source VM connects with an IPv4 address without knowing that the
- * destination has bound to '[::]' the migration will unconditionally fail
- * unless the management software is explicitly listening on the IPv4
- * address while using a RoCE-based device.
- *
- * If the source VM connects with an IPv6 address, then we're OK because we can
- * throw an error on the source (and similarly on the destination).
- *
- * But in mixed environments, this will be broken for a while until it is fixed
- * inside linux.
- *
- * We do provide a *tiny* bit of help in this function: We can list all of the
- * devices in the system and check to see if all the devices are RoCE or
- * Infiniband.
- *
- * If we detect that we have a *pure* RoCE environment, then we can safely
- * thrown an error even if the management software has specified '[::]' as the
- * bind address.
- *
- * However, if there is are multiple hetergeneous devices, then we cannot make
- * this assumption and the user just has to be sure they know what they are
- * doing.
- *
- * Patches are being reviewed on linux-rdma.
- */
-static int qemu_rdma_broken_ipv6_kernel(struct ibv_context *verbs, Error **errp)
-{
- /* This bug only exists in linux, to our knowledge. */
-#ifdef CONFIG_LINUX
- struct ibv_port_attr port_attr;
-
- /*
- * Verbs are only NULL if management has bound to '[::]'.
- *
- * Let's iterate through all the devices and see if there any pure IB
- * devices (non-ethernet).
- *
- * If not, then we can safely proceed with the migration.
- * Otherwise, there are no guarantees until the bug is fixed in linux.
- */
- if (!verbs) {
- int num_devices;
- struct ibv_device **dev_list = ibv_get_device_list(&num_devices);
- bool roce_found = false;
- bool ib_found = false;
-
- for (int x = 0; x < num_devices; x++) {
- verbs = ibv_open_device(dev_list[x]);
- /*
- * ibv_open_device() is not documented to set errno. If
- * it does, it's somebody else's doc bug. If it doesn't,
- * the use of errno below is wrong.
- * TODO Find out whether ibv_open_device() sets errno.
- */
- if (!verbs) {
- if (errno == EPERM) {
- continue;
- } else {
- error_setg_errno(errp, errno,
- "could not open RDMA device context");
- return -1;
- }
- }
-
- if (ibv_query_port(verbs, 1, &port_attr)) {
- ibv_close_device(verbs);
- error_setg(errp,
- "RDMA ERROR: Could not query initial IB port");
- return -1;
- }
-
- if (port_attr.link_layer == IBV_LINK_LAYER_INFINIBAND) {
- ib_found = true;
- } else if (port_attr.link_layer == IBV_LINK_LAYER_ETHERNET) {
- roce_found = true;
- }
-
- ibv_close_device(verbs);
-
- }
-
- if (roce_found) {
- if (ib_found) {
- warn_report("migrations may fail:"
- " IPv6 over RoCE / iWARP in linux"
- " is broken. But since you appear to have a"
- " mixed RoCE / IB environment, be sure to only"
- " migrate over the IB fabric until the kernel "
- " fixes the bug.");
- } else {
- error_setg(errp, "RDMA ERROR: "
- "You only have RoCE / iWARP devices in your systems"
- " and your management software has specified '[::]'"
- ", but IPv6 over RoCE / iWARP is not supported in Linux.");
- return -1;
- }
- }
-
- return 0;
- }
-
- /*
- * If we have a verbs context, that means that some other than '[::]' was
- * used by the management software for binding. In which case we can
- * actually warn the user about a potentially broken kernel.
- */
-
- /* IB ports start with 1, not 0 */
- if (ibv_query_port(verbs, 1, &port_attr)) {
- error_setg(errp, "RDMA ERROR: Could not query initial IB port");
- return -1;
- }
-
- if (port_attr.link_layer == IBV_LINK_LAYER_ETHERNET) {
- error_setg(errp, "RDMA ERROR: "
- "Linux kernel's RoCE / iWARP does not support IPv6 "
- "(but patches on linux-rdma in progress)");
- return -1;
- }
-
-#endif
-
- return 0;
-}
-
-/*
* Figure out which RDMA device corresponds to the requested IP hostname
* Also create the initial connection manager identifiers for opening
* the connection.
*/
static int qemu_rdma_resolve_host(RDMAContext *rdma, Error **errp)
{
- Error *err = NULL;
int ret;
struct rdma_addrinfo *res;
char port_str[16];
@@ -953,9 +809,8 @@ static int qemu_rdma_resolve_host(RDMAContext *rdma, Error **errp)
goto err_resolve_get_addr;
}
- /* Try all addresses, saving the first error in @err */
+ /* Try all addresses, exit loop on first success of resolving address */
for (struct rdma_addrinfo *e = res; e != NULL; e = e->ai_next) {
- Error **local_errp = err ? NULL : &err;
inet_ntop(e->ai_family,
&((struct sockaddr_in *) e->ai_dst_addr)->sin_addr, ip, sizeof ip);
@@ -964,25 +819,12 @@ static int qemu_rdma_resolve_host(RDMAContext *rdma, Error **errp)
ret = rdma_resolve_addr(rdma->cm_id, NULL, e->ai_dst_addr,
RDMA_RESOLVE_TIMEOUT_MS);
if (ret >= 0) {
- if (e->ai_family == AF_INET6) {
- ret = qemu_rdma_broken_ipv6_kernel(rdma->cm_id->verbs,
- local_errp);
- if (ret < 0) {
- continue;
- }
- }
- error_free(err);
goto route;
}
}
rdma_freeaddrinfo(res);
- if (err) {
- error_propagate(errp, err);
- } else {
- error_setg(errp, "RDMA ERROR: could not resolve address %s",
- rdma->host);
- }
+ error_setg(errp, "RDMA ERROR: could not resolve address %s", rdma->host);
goto err_resolve_get_addr;
route:
@@ -2611,7 +2453,6 @@ err_rdma_source_connect:
static int qemu_rdma_dest_init(RDMAContext *rdma, Error **errp)
{
- Error *err = NULL;
int ret;
struct rdma_cm_id *listen_id;
char ip[40] = "unknown";
@@ -2661,9 +2502,8 @@ static int qemu_rdma_dest_init(RDMAContext *rdma, Error **errp)
goto err_dest_init_bind_addr;
}
- /* Try all addresses, saving the first error in @err */
+ /* Try all addresses */
for (e = res; e != NULL; e = e->ai_next) {
- Error **local_errp = err ? NULL : &err;
inet_ntop(e->ai_family,
&((struct sockaddr_in *) e->ai_dst_addr)->sin_addr, ip, sizeof ip);
@@ -2672,24 +2512,12 @@ static int qemu_rdma_dest_init(RDMAContext *rdma, Error **errp)
if (ret < 0) {
continue;
}
- if (e->ai_family == AF_INET6) {
- ret = qemu_rdma_broken_ipv6_kernel(listen_id->verbs,
- local_errp);
- if (ret < 0) {
- continue;
- }
- }
- error_free(err);
break;
}
rdma_freeaddrinfo(res);
if (!e) {
- if (err) {
- error_propagate(errp, err);
- } else {
- error_setg(errp, "RDMA ERROR: Error: could not rdma_bind_addr!");
- }
+ error_setg(errp, "RDMA ERROR: Error: could not rdma_bind_addr!");
goto err_dest_init_bind_addr;
}
@@ -3284,14 +3112,11 @@ err:
int rdma_control_save_page(QEMUFile *f, ram_addr_t block_offset,
ram_addr_t offset, size_t size)
{
- if (!migrate_rdma() || migration_in_postcopy()) {
- return RAM_SAVE_CONTROL_NOT_SUPP;
- }
+ assert(migrate_rdma());
int ret = qemu_rdma_save_page(f, block_offset, offset, size);
- if (ret != RAM_SAVE_CONTROL_DELAYED &&
- ret != RAM_SAVE_CONTROL_NOT_SUPP) {
+ if (ret != RAM_SAVE_CONTROL_DELAYED) {
if (ret < 0) {
qemu_file_set_error(f, ret);
}
@@ -3829,7 +3654,7 @@ int rdma_block_notification_handle(QEMUFile *f, const char *name)
int rdma_registration_start(QEMUFile *f, uint64_t flags)
{
- if (!migrate_rdma() || migration_in_postcopy()) {
+ if (!migrate_rdma()) {
return 0;
}
@@ -3861,7 +3686,7 @@ int rdma_registration_stop(QEMUFile *f, uint64_t flags)
RDMAControlHeader head = { .len = 0, .repeat = 1 };
int ret;
- if (!migrate_rdma() || migration_in_postcopy()) {
+ if (!migrate_rdma()) {
return 0;
}
diff --git a/migration/rdma.h b/migration/rdma.h
index 4d3386b..f74f16a 100644
--- a/migration/rdma.h
+++ b/migration/rdma.h
@@ -33,7 +33,6 @@ void rdma_start_incoming_migration(InetSocketAddress *host_port, Error **errp);
#define RAM_CONTROL_ROUND 1
#define RAM_CONTROL_FINISH 3
-#define RAM_SAVE_CONTROL_NOT_SUPP -1000
#define RAM_SAVE_CONTROL_DELAYED -2000
#ifdef CONFIG_RDMA
@@ -56,7 +55,7 @@ static inline
int rdma_control_save_page(QEMUFile *f, ram_addr_t block_offset,
ram_addr_t offset, size_t size)
{
- return RAM_SAVE_CONTROL_NOT_SUPP;
+ g_assert_not_reached();
}
#endif
#endif
diff --git a/migration/savevm.c b/migration/savevm.c
index 0c12e37..006514c 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1523,6 +1523,39 @@ void qemu_savevm_state_complete_postcopy(QEMUFile *f)
qemu_fflush(f);
}
+bool qemu_savevm_state_postcopy_prepare(QEMUFile *f, Error **errp)
+{
+ SaveStateEntry *se;
+ bool ret;
+
+ QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
+ if (!se->ops || !se->ops->save_postcopy_prepare) {
+ continue;
+ }
+
+ if (se->ops->is_active) {
+ if (!se->ops->is_active(se->opaque)) {
+ continue;
+ }
+ }
+
+ trace_savevm_section_start(se->idstr, se->section_id);
+
+ save_section_header(f, se, QEMU_VM_SECTION_PART);
+ ret = se->ops->save_postcopy_prepare(f, se->opaque, errp);
+ save_section_footer(f, se);
+
+ trace_savevm_section_end(se->idstr, se->section_id, ret);
+
+ if (!ret) {
+ assert(*errp);
+ return false;
+ }
+ }
+
+ return true;
+}
+
int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy)
{
int64_t start_ts_each, end_ts_each;
diff --git a/migration/savevm.h b/migration/savevm.h
index 138c39a..2d5e9c7 100644
--- a/migration/savevm.h
+++ b/migration/savevm.h
@@ -45,6 +45,7 @@ void qemu_savevm_state_pending_exact(uint64_t *must_precopy,
void qemu_savevm_state_pending_estimate(uint64_t *must_precopy,
uint64_t *can_postcopy);
int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy);
+bool qemu_savevm_state_postcopy_prepare(QEMUFile *f, Error **errp);
void qemu_savevm_send_ping(QEMUFile *f, uint32_t value);
void qemu_savevm_send_open_return_path(QEMUFile *f);
int qemu_savevm_send_packaged(QEMUFile *f, const uint8_t *buf, size_t len);
diff --git a/scripts/rdma-migration-helper.sh b/scripts/rdma-migration-helper.sh
new file mode 100755
index 0000000..a39f2fb
--- /dev/null
+++ b/scripts/rdma-migration-helper.sh
@@ -0,0 +1,70 @@
+#!/bin/bash
+
+# Copied from blktests
+get_ipv4_addr()
+{
+ ip -4 -o addr show dev "$1" |
+ sed -n 's/.*[[:blank:]]inet[[:blank:]]*\([^[:blank:]/]*\).*/\1/p' |
+ head -1 | tr -d '\n'
+}
+
+# existing rdma interfaces
+rdma_interfaces()
+{
+ rdma link show | sed -nE 's/^link .* netdev ([^ ]+).*$/\1 /p'
+}
+
+# existing valid ipv4 interfaces
+ipv4_interfaces()
+{
+ ip -o addr show | awk '/inet / {print $2}' | grep -v -w lo
+}
+
+rdma_rxe_detect()
+{
+ for r in $(rdma_interfaces)
+ do
+ ipv4_interfaces | grep -qw $r && get_ipv4_addr $r && return
+ done
+
+ return 1
+}
+
+rdma_rxe_setup()
+{
+ for i in $(ipv4_interfaces)
+ do
+ rdma_interfaces | grep -qw $i && continue
+ rdma link add "${i}_rxe" type rxe netdev "$i" && {
+ echo "Setup new rdma/rxe ${i}_rxe for $i with $(get_ipv4_addr $i)"
+ return
+ }
+ done
+
+ echo "Failed to setup any new rdma/rxe link" >&2
+ return 1
+}
+
+rdma_rxe_clean()
+{
+ modprobe -r rdma_rxe
+}
+
+operation=${1:-detect}
+
+command -v rdma >/dev/null || {
+ echo "Command 'rdma' is not available, please install it first." >&2
+ exit 1
+}
+
+if [ "$operation" == "setup" ] || [ "$operation" == "clean" ]; then
+ [ "$UID" == 0 ] || {
+ echo "Root privilege is required to setup/clean a rdma/rxe link" >&2
+ exit 1
+ }
+ rdma_rxe_"$operation"
+elif [ "$operation" == "detect" ]; then
+ rdma_rxe_detect
+else
+ echo "Usage: $0 [setup | detect | clean]"
+fi
diff --git a/scripts/vmstate-static-checker.py b/scripts/vmstate-static-checker.py
index 9c0e6b8..25aca83 100755
--- a/scripts/vmstate-static-checker.py
+++ b/scripts/vmstate-static-checker.py
@@ -42,6 +42,7 @@ def check_fields_match(name, s_field, d_field):
# Some fields changed names between qemu versions. This list
# is used to allow such changes in each section / description.
changed_names = {
+ 'acpi-ghes': ['ghes_addr_le', 'hw_error_le'],
'apic': ['timer', 'timer_expiry'],
'e1000': ['dev', 'parent_obj'],
'ehci': ['dev', 'pcidev'],
diff --git a/tests/qtest/migration/compression-tests.c b/tests/qtest/migration/compression-tests.c
index 8b58401..41e79f0 100644
--- a/tests/qtest/migration/compression-tests.c
+++ b/tests/qtest/migration/compression-tests.c
@@ -35,6 +35,9 @@ static void test_multifd_tcp_zstd(void)
{
MigrateCommon args = {
.listen_uri = "defer",
+ .start = {
+ .caps[MIGRATION_CAPABILITY_MULTIFD] = true,
+ },
.start_hook = migrate_hook_start_precopy_tcp_multifd_zstd,
};
test_precopy_common(&args);
@@ -56,6 +59,9 @@ static void test_multifd_tcp_qatzip(void)
{
MigrateCommon args = {
.listen_uri = "defer",
+ .start = {
+ .caps[MIGRATION_CAPABILITY_MULTIFD] = true,
+ },
.start_hook = migrate_hook_start_precopy_tcp_multifd_qatzip,
};
test_precopy_common(&args);
@@ -74,6 +80,9 @@ static void test_multifd_tcp_qpl(void)
{
MigrateCommon args = {
.listen_uri = "defer",
+ .start = {
+ .caps[MIGRATION_CAPABILITY_MULTIFD] = true,
+ },
.start_hook = migrate_hook_start_precopy_tcp_multifd_qpl,
};
test_precopy_common(&args);
@@ -92,6 +101,9 @@ static void test_multifd_tcp_uadk(void)
{
MigrateCommon args = {
.listen_uri = "defer",
+ .start = {
+ .caps[MIGRATION_CAPABILITY_MULTIFD] = true,
+ },
.start_hook = migrate_hook_start_precopy_tcp_multifd_uadk,
};
test_precopy_common(&args);
@@ -103,10 +115,6 @@ migrate_hook_start_xbzrle(QTestState *from,
QTestState *to)
{
migrate_set_parameter_int(from, "xbzrle-cache-size", 33554432);
-
- migrate_set_capability(from, "xbzrle", true);
- migrate_set_capability(to, "xbzrle", true);
-
return NULL;
}
@@ -118,6 +126,9 @@ static void test_precopy_unix_xbzrle(void)
.listen_uri = uri,
.start_hook = migrate_hook_start_xbzrle,
.iterations = 2,
+ .start = {
+ .caps[MIGRATION_CAPABILITY_XBZRLE] = true,
+ },
/*
* XBZRLE needs pages to be modified when doing the 2nd+ round
* iteration to have real data pushed to the stream.
@@ -146,6 +157,9 @@ static void test_multifd_tcp_zlib(void)
{
MigrateCommon args = {
.listen_uri = "defer",
+ .start = {
+ .caps[MIGRATION_CAPABILITY_MULTIFD] = true,
+ },
.start_hook = migrate_hook_start_precopy_tcp_multifd_zlib,
};
test_precopy_common(&args);
diff --git a/tests/qtest/migration/cpr-tests.c b/tests/qtest/migration/cpr-tests.c
index 4758841..5536e14 100644
--- a/tests/qtest/migration/cpr-tests.c
+++ b/tests/qtest/migration/cpr-tests.c
@@ -24,9 +24,6 @@ static void *migrate_hook_start_mode_reboot(QTestState *from, QTestState *to)
migrate_set_parameter_str(from, "mode", "cpr-reboot");
migrate_set_parameter_str(to, "mode", "cpr-reboot");
- migrate_set_capability(from, "x-ignore-shared", true);
- migrate_set_capability(to, "x-ignore-shared", true);
-
return NULL;
}
@@ -39,6 +36,9 @@ static void test_mode_reboot(void)
.connect_uri = uri,
.listen_uri = "defer",
.start_hook = migrate_hook_start_mode_reboot,
+ .start = {
+ .caps[MIGRATION_CAPABILITY_X_IGNORE_SHARED] = true,
+ },
};
test_file_common(&args, true);
diff --git a/tests/qtest/migration/file-tests.c b/tests/qtest/migration/file-tests.c
index f260e28..4d78ce0 100644
--- a/tests/qtest/migration/file-tests.c
+++ b/tests/qtest/migration/file-tests.c
@@ -107,15 +107,6 @@ static void test_precopy_file_offset_bad(void)
test_file_common(&args, false);
}
-static void *migrate_hook_start_mapped_ram(QTestState *from,
- QTestState *to)
-{
- migrate_set_capability(from, "mapped-ram", true);
- migrate_set_capability(to, "mapped-ram", true);
-
- return NULL;
-}
-
static void test_precopy_file_mapped_ram_live(void)
{
g_autofree char *uri = g_strdup_printf("file:%s/%s", tmpfs,
@@ -123,7 +114,9 @@ static void test_precopy_file_mapped_ram_live(void)
MigrateCommon args = {
.connect_uri = uri,
.listen_uri = "defer",
- .start_hook = migrate_hook_start_mapped_ram,
+ .start = {
+ .caps[MIGRATION_CAPABILITY_MAPPED_RAM] = true,
+ },
};
test_file_common(&args, false);
@@ -136,26 +129,14 @@ static void test_precopy_file_mapped_ram(void)
MigrateCommon args = {
.connect_uri = uri,
.listen_uri = "defer",
- .start_hook = migrate_hook_start_mapped_ram,
+ .start = {
+ .caps[MIGRATION_CAPABILITY_MAPPED_RAM] = true,
+ },
};
test_file_common(&args, true);
}
-static void *migrate_hook_start_multifd_mapped_ram(QTestState *from,
- QTestState *to)
-{
- migrate_hook_start_mapped_ram(from, to);
-
- migrate_set_parameter_int(from, "multifd-channels", 4);
- migrate_set_parameter_int(to, "multifd-channels", 4);
-
- migrate_set_capability(from, "multifd", true);
- migrate_set_capability(to, "multifd", true);
-
- return NULL;
-}
-
static void test_multifd_file_mapped_ram_live(void)
{
g_autofree char *uri = g_strdup_printf("file:%s/%s", tmpfs,
@@ -163,7 +144,10 @@ static void test_multifd_file_mapped_ram_live(void)
MigrateCommon args = {
.connect_uri = uri,
.listen_uri = "defer",
- .start_hook = migrate_hook_start_multifd_mapped_ram,
+ .start = {
+ .caps[MIGRATION_CAPABILITY_MULTIFD] = true,
+ .caps[MIGRATION_CAPABILITY_MAPPED_RAM] = true,
+ },
};
test_file_common(&args, false);
@@ -176,7 +160,10 @@ static void test_multifd_file_mapped_ram(void)
MigrateCommon args = {
.connect_uri = uri,
.listen_uri = "defer",
- .start_hook = migrate_hook_start_multifd_mapped_ram,
+ .start = {
+ .caps[MIGRATION_CAPABILITY_MULTIFD] = true,
+ .caps[MIGRATION_CAPABILITY_MAPPED_RAM] = true,
+ },
};
test_file_common(&args, true);
@@ -185,8 +172,6 @@ static void test_multifd_file_mapped_ram(void)
static void *migrate_hook_start_multifd_mapped_ram_dio(QTestState *from,
QTestState *to)
{
- migrate_hook_start_multifd_mapped_ram(from, to);
-
migrate_set_parameter_bool(from, "direct-io", true);
migrate_set_parameter_bool(to, "direct-io", true);
@@ -201,6 +186,10 @@ static void test_multifd_file_mapped_ram_dio(void)
.connect_uri = uri,
.listen_uri = "defer",
.start_hook = migrate_hook_start_multifd_mapped_ram_dio,
+ .start = {
+ .caps[MIGRATION_CAPABILITY_MAPPED_RAM] = true,
+ .caps[MIGRATION_CAPABILITY_MULTIFD] = true,
+ },
};
if (!probe_o_direct_support(tmpfs)) {
@@ -246,7 +235,6 @@ static void *migrate_hook_start_multifd_mapped_ram_fdset_dio(QTestState *from,
fdset_add_fds(from, file, O_WRONLY, 2, true);
fdset_add_fds(to, file, O_RDONLY, 2, true);
- migrate_hook_start_multifd_mapped_ram(from, to);
migrate_set_parameter_bool(from, "direct-io", true);
migrate_set_parameter_bool(to, "direct-io", true);
@@ -261,8 +249,6 @@ static void *migrate_hook_start_multifd_mapped_ram_fdset(QTestState *from,
fdset_add_fds(from, file, O_WRONLY, 2, false);
fdset_add_fds(to, file, O_RDONLY, 2, false);
- migrate_hook_start_multifd_mapped_ram(from, to);
-
return NULL;
}
@@ -275,6 +261,10 @@ static void test_multifd_file_mapped_ram_fdset(void)
.listen_uri = "defer",
.start_hook = migrate_hook_start_multifd_mapped_ram_fdset,
.end_hook = migrate_hook_end_multifd_mapped_ram_fdset,
+ .start = {
+ .caps[MIGRATION_CAPABILITY_MAPPED_RAM] = true,
+ .caps[MIGRATION_CAPABILITY_MULTIFD] = true,
+ },
};
test_file_common(&args, true);
@@ -289,6 +279,10 @@ static void test_multifd_file_mapped_ram_fdset_dio(void)
.listen_uri = "defer",
.start_hook = migrate_hook_start_multifd_mapped_ram_fdset_dio,
.end_hook = migrate_hook_end_multifd_mapped_ram_fdset,
+ .start = {
+ .caps[MIGRATION_CAPABILITY_MAPPED_RAM] = true,
+ .caps[MIGRATION_CAPABILITY_MULTIFD] = true,
+ },
};
if (!probe_o_direct_support(tmpfs)) {
diff --git a/tests/qtest/migration/framework.c b/tests/qtest/migration/framework.c
index 10e1d04..e48b80a 100644
--- a/tests/qtest/migration/framework.c
+++ b/tests/qtest/migration/framework.c
@@ -30,6 +30,7 @@
#define QEMU_VM_FILE_MAGIC 0x5145564d
#define QEMU_ENV_SRC "QTEST_QEMU_BINARY_SRC"
#define QEMU_ENV_DST "QTEST_QEMU_BINARY_DST"
+#define MULTIFD_TEST_CHANNELS 4
unsigned start_address;
unsigned end_address;
@@ -207,6 +208,51 @@ static QList *migrate_start_get_qmp_capabilities(const MigrateStart *args)
return capabilities;
}
+static void migrate_start_set_capabilities(QTestState *from, QTestState *to,
+ MigrateStart *args)
+{
+ /*
+ * MigrationCapability_lookup and MIGRATION_CAPABILITY_ constants
+ * are from qapi-types-migration.h.
+ */
+ for (uint8_t i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
+ if (!args->caps[i]) {
+ continue;
+ }
+ if (from) {
+ migrate_set_capability(from,
+ MigrationCapability_lookup.array[i], true);
+ }
+ if (to) {
+ migrate_set_capability(to,
+ MigrationCapability_lookup.array[i], true);
+ }
+ }
+
+ /*
+ * Always enable migration events. Libvirt always uses it, let's try
+ * to mimic as closer as that.
+ */
+ migrate_set_capability(from, "events", true);
+ if (!args->defer_target_connect) {
+ migrate_set_capability(to, "events", true);
+ }
+
+ /*
+ * Default number of channels should be fine for most
+ * tests. Individual tests can override by calling
+ * migrate_set_parameter() directly.
+ */
+ if (args->caps[MIGRATION_CAPABILITY_MULTIFD]) {
+ migrate_set_parameter_int(from, "multifd-channels",
+ MULTIFD_TEST_CHANNELS);
+ migrate_set_parameter_int(to, "multifd-channels",
+ MULTIFD_TEST_CHANNELS);
+ }
+
+ return;
+}
+
int migrate_start(QTestState **from, QTestState **to, const char *uri,
MigrateStart *args)
{
@@ -379,14 +425,7 @@ int migrate_start(QTestState **from, QTestState **to, const char *uri,
unlink(shmem_path);
}
- /*
- * Always enable migration events. Libvirt always uses it, let's try
- * to mimic as closer as that.
- */
- migrate_set_capability(*from, "events", true);
- if (!args->defer_target_connect) {
- migrate_set_capability(*to, "events", true);
- }
+ migrate_start_set_capabilities(*from, *to, args);
return 0;
}
@@ -432,6 +471,10 @@ static int migrate_postcopy_prepare(QTestState **from_ptr,
{
QTestState *from, *to;
+ /* set postcopy capabilities */
+ args->start.caps[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME] = true;
+ args->start.caps[MIGRATION_CAPABILITY_POSTCOPY_RAM] = true;
+
if (migrate_start(&from, &to, "defer", &args->start)) {
return -1;
}
@@ -440,17 +483,7 @@ static int migrate_postcopy_prepare(QTestState **from_ptr,
args->postcopy_data = args->start_hook(from, to);
}
- migrate_set_capability(from, "postcopy-ram", true);
- migrate_set_capability(to, "postcopy-ram", true);
- migrate_set_capability(to, "postcopy-blocktime", true);
-
- if (args->postcopy_preempt) {
- migrate_set_capability(from, "postcopy-preempt", true);
- migrate_set_capability(to, "postcopy-preempt", true);
- }
-
migrate_ensure_non_converge(from);
-
migrate_prepare_for_dirty_mem(from);
qtest_qmp_assert_success(to, "{ 'execute': 'migrate-incoming',"
" 'arguments': { "
@@ -948,15 +981,9 @@ void *migrate_hook_start_precopy_tcp_multifd_common(QTestState *from,
QTestState *to,
const char *method)
{
- migrate_set_parameter_int(from, "multifd-channels", 16);
- migrate_set_parameter_int(to, "multifd-channels", 16);
-
migrate_set_parameter_str(from, "multifd-compression", method);
migrate_set_parameter_str(to, "multifd-compression", method);
- migrate_set_capability(from, "multifd", true);
- migrate_set_capability(to, "multifd", true);
-
/* Start incoming migration from the 1st socket */
migrate_incoming_qmp(to, "tcp:127.0.0.1:0", NULL, "{}");
diff --git a/tests/qtest/migration/framework.h b/tests/qtest/migration/framework.h
index e4a1187..01e425e 100644
--- a/tests/qtest/migration/framework.h
+++ b/tests/qtest/migration/framework.h
@@ -12,6 +12,7 @@
#define TEST_FRAMEWORK_H
#include "libqtest.h"
+#include <qapi/qapi-types-migration.h>
#define FILE_TEST_FILENAME "migfile"
#define FILE_TEST_OFFSET 0x1000
@@ -120,6 +121,13 @@ typedef struct {
/* Do not connect to target monitor and qtest sockets in qtest_init */
bool defer_target_connect;
+
+ /*
+ * Migration capabilities to be set in both source and
+ * destination. For unilateral capabilities, use
+ * migration_set_capabilities().
+ */
+ bool caps[MIGRATION_CAPABILITY__MAX];
} MigrateStart;
typedef enum PostcopyRecoveryFailStage {
@@ -207,7 +215,6 @@ typedef struct {
/* Postcopy specific fields */
void *postcopy_data;
- bool postcopy_preempt;
PostcopyRecoveryFailStage postcopy_recovery_fail_stage;
} MigrateCommon;
diff --git a/tests/qtest/migration/misc-tests.c b/tests/qtest/migration/misc-tests.c
index 2e612d9..5499525 100644
--- a/tests/qtest/migration/misc-tests.c
+++ b/tests/qtest/migration/misc-tests.c
@@ -98,6 +98,7 @@ static void test_ignore_shared(void)
QTestState *from, *to;
MigrateStart args = {
.use_shmem = true,
+ .caps[MIGRATION_CAPABILITY_X_IGNORE_SHARED] = true,
};
if (migrate_start(&from, &to, uri, &args)) {
@@ -107,9 +108,6 @@ static void test_ignore_shared(void)
migrate_ensure_non_converge(from);
migrate_prepare_for_dirty_mem(from);
- migrate_set_capability(from, "x-ignore-shared", true);
- migrate_set_capability(to, "x-ignore-shared", true);
-
/* Wait for the first serial output from the source */
wait_for_serial("src_serial");
diff --git a/tests/qtest/migration/postcopy-tests.c b/tests/qtest/migration/postcopy-tests.c
index 982457b..483e3ff 100644
--- a/tests/qtest/migration/postcopy-tests.c
+++ b/tests/qtest/migration/postcopy-tests.c
@@ -39,7 +39,9 @@ static void test_postcopy_suspend(void)
static void test_postcopy_preempt(void)
{
MigrateCommon args = {
- .postcopy_preempt = true,
+ .start = {
+ .caps[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT] = true,
+ },
};
test_postcopy_common(&args);
@@ -73,7 +75,9 @@ static void test_postcopy_recovery_fail_reconnect(void)
static void test_postcopy_preempt_recovery(void)
{
MigrateCommon args = {
- .postcopy_preempt = true,
+ .start = {
+ .caps[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT] = true,
+ },
};
test_postcopy_recovery_common(&args);
diff --git a/tests/qtest/migration/precopy-tests.c b/tests/qtest/migration/precopy-tests.c
index ba273d1..87b0a7e 100644
--- a/tests/qtest/migration/precopy-tests.c
+++ b/tests/qtest/migration/precopy-tests.c
@@ -99,32 +99,85 @@ static void test_precopy_unix_dirty_ring(void)
test_precopy_common(&args);
}
-static void test_precopy_tcp_plain(void)
+#ifdef CONFIG_RDMA
+
+#define RDMA_MIGRATION_HELPER "scripts/rdma-migration-helper.sh"
+static int new_rdma_link(char *buffer)
{
+ char cmd[256];
+ bool verbose = g_getenv("QTEST_LOG");
+
+ snprintf(cmd, sizeof(cmd), "%s detect %s", RDMA_MIGRATION_HELPER,
+ verbose ? "" : "2>/dev/null");
+
+ FILE *pipe = popen(cmd, "r");
+ if (pipe == NULL) {
+ perror("Failed to run script");
+ return -1;
+ }
+
+ int idx = 0;
+ while (fgets(buffer + idx, 128 - idx, pipe) != NULL) {
+ idx += strlen(buffer);
+ }
+
+ int status = pclose(pipe);
+ if (status == -1) {
+ perror("Error reported by pclose()");
+ return -1;
+ } else if (WIFEXITED(status)) {
+ return WEXITSTATUS(status);
+ }
+
+ return -1;
+}
+
+static void test_precopy_rdma_plain(void)
+{
+ char buffer[128] = {};
+
+ if (new_rdma_link(buffer)) {
+ g_test_skip("No rdma link available\n"
+ "# To enable the test:\n"
+ "# Run \'" RDMA_MIGRATION_HELPER " setup\' with root to "
+ "setup a new rdma/rxe link and rerun the test\n"
+ "# Optional: run 'scripts/rdma-migration-helper.sh clean' "
+ "to revert the 'setup'");
+ return;
+ }
+
+ /*
+ * TODO: query a free port instead of hard code.
+ * 29200=('R'+'D'+'M'+'A')*100
+ **/
+ g_autofree char *uri = g_strdup_printf("rdma:%s:29200", buffer);
+
MigrateCommon args = {
- .listen_uri = "tcp:127.0.0.1:0",
+ .listen_uri = uri,
+ .connect_uri = uri,
};
test_precopy_common(&args);
}
+#endif
-static void *migrate_hook_start_switchover_ack(QTestState *from, QTestState *to)
+static void test_precopy_tcp_plain(void)
{
+ MigrateCommon args = {
+ .listen_uri = "tcp:127.0.0.1:0",
+ };
- migrate_set_capability(from, "return-path", true);
- migrate_set_capability(to, "return-path", true);
-
- migrate_set_capability(from, "switchover-ack", true);
- migrate_set_capability(to, "switchover-ack", true);
-
- return NULL;
+ test_precopy_common(&args);
}
static void test_precopy_tcp_switchover_ack(void)
{
MigrateCommon args = {
.listen_uri = "tcp:127.0.0.1:0",
- .start_hook = migrate_hook_start_switchover_ack,
+ .start = {
+ .caps[MIGRATION_CAPABILITY_RETURN_PATH] = true,
+ .caps[MIGRATION_CAPABILITY_SWITCHOVER_ACK] = true,
+ },
/*
* Source VM must be running in order to consider the switchover ACK
* when deciding to do switchover or not.
@@ -393,6 +446,9 @@ static void test_multifd_tcp_uri_none(void)
MigrateCommon args = {
.listen_uri = "defer",
.start_hook = migrate_hook_start_precopy_tcp_multifd,
+ .start = {
+ .caps[MIGRATION_CAPABILITY_MULTIFD] = true,
+ },
/*
* Multifd is more complicated than most of the features, it
* directly takes guest page buffers when sending, make sure
@@ -408,6 +464,9 @@ static void test_multifd_tcp_zero_page_legacy(void)
MigrateCommon args = {
.listen_uri = "defer",
.start_hook = migrate_hook_start_precopy_tcp_multifd_zero_page_legacy,
+ .start = {
+ .caps[MIGRATION_CAPABILITY_MULTIFD] = true,
+ },
/*
* Multifd is more complicated than most of the features, it
* directly takes guest page buffers when sending, make sure
@@ -423,6 +482,9 @@ static void test_multifd_tcp_no_zero_page(void)
MigrateCommon args = {
.listen_uri = "defer",
.start_hook = migrate_hook_start_precopy_tcp_multifd_no_zero_page,
+ .start = {
+ .caps[MIGRATION_CAPABILITY_MULTIFD] = true,
+ },
/*
* Multifd is more complicated than most of the features, it
* directly takes guest page buffers when sending, make sure
@@ -439,6 +501,9 @@ static void test_multifd_tcp_channels_none(void)
.listen_uri = "defer",
.start_hook = migrate_hook_start_precopy_tcp_multifd,
.live = true,
+ .start = {
+ .caps[MIGRATION_CAPABILITY_MULTIFD] = true,
+ },
.connect_channels = ("[ { 'channel-type': 'main',"
" 'addr': { 'transport': 'socket',"
" 'type': 'inet',"
@@ -1124,6 +1189,10 @@ static void migration_test_add_precopy_smoke(MigrationTestEnv *env)
test_multifd_tcp_uri_none);
migration_test_add("/migration/multifd/tcp/plain/cancel",
test_multifd_tcp_cancel);
+#ifdef CONFIG_RDMA
+ migration_test_add("/migration/precopy/rdma/plain",
+ test_precopy_rdma_plain);
+#endif
}
void migration_test_add_precopy(MigrationTestEnv *env)
diff --git a/tests/qtest/migration/tls-tests.c b/tests/qtest/migration/tls-tests.c
index 2cb4a44..72f44de 100644
--- a/tests/qtest/migration/tls-tests.c
+++ b/tests/qtest/migration/tls-tests.c
@@ -375,9 +375,11 @@ static void test_postcopy_tls_psk(void)
static void test_postcopy_preempt_tls_psk(void)
{
MigrateCommon args = {
- .postcopy_preempt = true,
.start_hook = migrate_hook_start_tls_psk_match,
.end_hook = migrate_hook_end_tls_psk,
+ .start = {
+ .caps[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT] = true,
+ },
};
test_postcopy_common(&args);
@@ -397,9 +399,11 @@ static void test_postcopy_recovery_tls_psk(void)
static void test_postcopy_preempt_all(void)
{
MigrateCommon args = {
- .postcopy_preempt = true,
.start_hook = migrate_hook_start_tls_psk_match,
.end_hook = migrate_hook_end_tls_psk,
+ .start = {
+ .caps[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT] = true,
+ },
};
test_postcopy_recovery_common(&args);
@@ -631,6 +635,9 @@ static void test_multifd_tcp_tls_psk_match(void)
.listen_uri = "defer",
.start_hook = migrate_hook_start_multifd_tcp_tls_psk_match,
.end_hook = migrate_hook_end_tls_psk,
+ .start = {
+ .caps[MIGRATION_CAPABILITY_MULTIFD] = true,
+ },
};
test_precopy_common(&args);
}
@@ -640,6 +647,7 @@ static void test_multifd_tcp_tls_psk_mismatch(void)
MigrateCommon args = {
.start = {
.hide_stderr = true,
+ .caps[MIGRATION_CAPABILITY_MULTIFD] = true,
},
.listen_uri = "defer",
.start_hook = migrate_hook_start_multifd_tcp_tls_psk_mismatch,
@@ -656,6 +664,9 @@ static void test_multifd_tcp_tls_x509_default_host(void)
.listen_uri = "defer",
.start_hook = migrate_hook_start_multifd_tls_x509_default_host,
.end_hook = migrate_hook_end_tls_x509,
+ .start = {
+ .caps[MIGRATION_CAPABILITY_MULTIFD] = true,
+ },
};
test_precopy_common(&args);
}
@@ -666,6 +677,9 @@ static void test_multifd_tcp_tls_x509_override_host(void)
.listen_uri = "defer",
.start_hook = migrate_hook_start_multifd_tls_x509_override_host,
.end_hook = migrate_hook_end_tls_x509,
+ .start = {
+ .caps[MIGRATION_CAPABILITY_MULTIFD] = true,
+ },
};
test_precopy_common(&args);
}
@@ -688,6 +702,7 @@ static void test_multifd_tcp_tls_x509_mismatch_host(void)
MigrateCommon args = {
.start = {
.hide_stderr = true,
+ .caps[MIGRATION_CAPABILITY_MULTIFD] = true,
},
.listen_uri = "defer",
.start_hook = migrate_hook_start_multifd_tls_x509_mismatch_host,
@@ -703,6 +718,9 @@ static void test_multifd_tcp_tls_x509_allow_anon_client(void)
.listen_uri = "defer",
.start_hook = migrate_hook_start_multifd_tls_x509_allow_anon_client,
.end_hook = migrate_hook_end_tls_x509,
+ .start = {
+ .caps[MIGRATION_CAPABILITY_MULTIFD] = true,
+ },
};
test_precopy_common(&args);
}
@@ -712,6 +730,7 @@ static void test_multifd_tcp_tls_x509_reject_anon_client(void)
MigrateCommon args = {
.start = {
.hide_stderr = true,
+ .caps[MIGRATION_CAPABILITY_MULTIFD] = true,
},
.listen_uri = "defer",
.start_hook = migrate_hook_start_multifd_tls_x509_reject_anon_client,