From adde220a93a0192782243d25580a647004f6dd44 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Thu, 19 Feb 2015 11:40:27 +0000 Subject: Add -incoming defer -incoming defer causes qemu to wait for an incoming migration to be specified later. The monitor can be used to set migration capabilities that may affect the incoming connection process. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela --- migration/migration.c | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index b3adbc6..f3d49d5 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -49,6 +49,8 @@ enum { static NotifierList migration_state_notifiers = NOTIFIER_LIST_INITIALIZER(migration_state_notifiers); +static bool deferred_incoming; + /* When we add fault tolerance, we could have several migrations at once. For now we don't need to add dynamic creation of migration */ @@ -65,25 +67,40 @@ MigrationState *migrate_get_current(void) return ¤t_migration; } +/* + * Called on -incoming with a defer: uri. + * The migration can be started later after any parameters have been + * changed. + */ +static void deferred_incoming_migration(Error **errp) +{ + if (deferred_incoming) { + error_setg(errp, "Incoming migration already deferred"); + } + deferred_incoming = true; +} + void qemu_start_incoming_migration(const char *uri, Error **errp) { const char *p; - if (strstart(uri, "tcp:", &p)) + if (!strcmp(uri, "defer")) { + deferred_incoming_migration(errp); + } else if (strstart(uri, "tcp:", &p)) { tcp_start_incoming_migration(p, errp); #ifdef CONFIG_RDMA - else if (strstart(uri, "rdma:", &p)) + } else if (strstart(uri, "rdma:", &p)) { rdma_start_incoming_migration(p, errp); #endif #if !defined(WIN32) - else if (strstart(uri, "exec:", &p)) + } else if (strstart(uri, "exec:", &p)) { exec_start_incoming_migration(p, errp); - else if (strstart(uri, "unix:", &p)) + } else if (strstart(uri, "unix:", &p)) { unix_start_incoming_migration(p, errp); - else if (strstart(uri, "fd:", &p)) + } else if (strstart(uri, "fd:", &p)) { fd_start_incoming_migration(p, errp); #endif - else { + } else { error_setg(errp, "unknown migration protocol: %s", uri); } } -- cgit v1.1 From bf1ae1f4dc348650fb7f32a3fcc278f60a7b1bf6 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Thu, 19 Feb 2015 11:40:28 +0000 Subject: Add migrate_incoming Add migrate_incoming/migrate-incoming to start an incoming migration. Once a qemu has been started with -incoming defer the migration can be started by issuing: migrate_incoming uri Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Eric Blake Signed-off-by: Juan Quintela --- hmp-commands.hx | 16 ++++++++++++++++ hmp.c | 14 ++++++++++++++ hmp.h | 1 + migration/migration.c | 19 +++++++++++++++++++ qapi-schema.json | 15 +++++++++++++++ qmp-commands.hx | 31 ++++++++++++++++++++++++++++++- 6 files changed, 95 insertions(+), 1 deletion(-) diff --git a/hmp-commands.hx b/hmp-commands.hx index d5022d8..9c1e849 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -922,6 +922,22 @@ Cancel the current VM migration. ETEXI { + .name = "migrate_incoming", + .args_type = "uri:s", + .params = "uri", + .help = "Continue an incoming migration from an -incoming defer", + .mhandler.cmd = hmp_migrate_incoming, + }, + +STEXI +@item migrate_incoming @var{uri} +@findex migrate_incoming +Continue an incoming migration using the @var{uri} (that has the same syntax +as the -incoming option). + +ETEXI + + { .name = "migrate_set_cache_size", .args_type = "value:o", .params = "value", diff --git a/hmp.c b/hmp.c index 71c28bc..f6cde86 100644 --- a/hmp.c +++ b/hmp.c @@ -1116,6 +1116,20 @@ void hmp_migrate_cancel(Monitor *mon, const QDict *qdict) qmp_migrate_cancel(NULL); } +void hmp_migrate_incoming(Monitor *mon, const QDict *qdict) +{ + Error *err = NULL; + const char *uri = qdict_get_str(qdict, "uri"); + + qmp_migrate_incoming(uri, &err); + + if (err) { + monitor_printf(mon, "%s\n", error_get_pretty(err)); + error_free(err); + return; + } +} + void hmp_migrate_set_downtime(Monitor *mon, const QDict *qdict) { double value = qdict_get_double(qdict, "value"); diff --git a/hmp.h b/hmp.h index 81177b2..371f8d4 100644 --- a/hmp.h +++ b/hmp.h @@ -60,6 +60,7 @@ void hmp_snapshot_delete_blkdev_internal(Monitor *mon, const QDict *qdict); void hmp_drive_mirror(Monitor *mon, const QDict *qdict); void hmp_drive_backup(Monitor *mon, const QDict *qdict); void hmp_migrate_cancel(Monitor *mon, const QDict *qdict); +void hmp_migrate_incoming(Monitor *mon, const QDict *qdict); void hmp_migrate_set_downtime(Monitor *mon, const QDict *qdict); void hmp_migrate_set_speed(Monitor *mon, const QDict *qdict); void hmp_migrate_set_capability(Monitor *mon, const QDict *qdict); diff --git a/migration/migration.c b/migration/migration.c index f3d49d5..2c805f1 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -432,6 +432,25 @@ void migrate_del_blocker(Error *reason) migration_blockers = g_slist_remove(migration_blockers, reason); } +void qmp_migrate_incoming(const char *uri, Error **errp) +{ + Error *local_err = NULL; + + if (!deferred_incoming) { + error_setg(errp, "'-incoming defer' is required for migrate_incoming"); + return; + } + + qemu_start_incoming_migration(uri, &local_err); + + if (local_err) { + error_propagate(errp, local_err); + return; + } + + deferred_incoming = false; +} + void qmp_migrate(const char *uri, bool has_blk, bool blk, bool has_inc, bool inc, bool has_detach, bool detach, Error **errp) diff --git a/qapi-schema.json b/qapi-schema.json index 8141f71..2b3e275 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -1738,6 +1738,21 @@ { 'command': 'migrate', 'data': {'uri': 'str', '*blk': 'bool', '*inc': 'bool', '*detach': 'bool' } } +## +# @migrate-incoming +# +# Start an incoming migration, the qemu must have been started +# with -incoming defer +# +# @uri: The Uniform Resource Identifier identifying the source or +# address to listen on +# +# Returns: nothing on success +# +# Since: 2.3 +## +{ 'command': 'migrate-incoming', 'data': {'uri': 'str' } } + # @xen-save-devices-state: # # Save the state of all devices to file. The RAM and the block devices diff --git a/qmp-commands.hx b/qmp-commands.hx index c12334a..0663924 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -661,7 +661,36 @@ Example: <- { "return": {} } EQMP -{ + + { + .name = "migrate-incoming", + .args_type = "uri:s", + .mhandler.cmd_new = qmp_marshal_input_migrate_incoming, + }, + +SQMP +migrate-incoming +---------------- + +Continue an incoming migration + +Arguments: + +- "uri": Source/listening URI (json-string) + +Example: + +-> { "execute": "migrate-incoming", "arguments": { "uri": "tcp::4446" } } +<- { "return": {} } + +Notes: + +(1) QEMU must be started with -incoming defer to allow migrate-incoming to + be used +(2) The uri format is the same as to -incoming + +EQMP + { .name = "migrate-set-cache-size", .args_type = "value:o", .mhandler.cmd_new = qmp_marshal_input_migrate_set_cache_size, -- cgit v1.1 From 6e1dea46b89e137ee1593ded5566d5371a61d304 Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Thu, 12 Feb 2015 19:02:42 +0100 Subject: ram: make all save_page functions take a uint64_t parameter It used to be an int, but then we can't pass directly the bytes_transferred parameter, that would happen later in the series. Signed-off-by: Juan Quintela Reviewed-by: Amit Shah --- arch_init.c | 11 ++++++++--- include/migration/migration.h | 2 +- include/migration/qemu-file.h | 2 +- migration/qemu-file.c | 3 ++- migration/rdma.c | 2 +- 5 files changed, 13 insertions(+), 7 deletions(-) diff --git a/arch_init.c b/arch_init.c index 691b5e2..f1551ed 100644 --- a/arch_init.c +++ b/arch_init.c @@ -584,6 +584,7 @@ static int ram_save_page(QEMUFile *f, RAMBlock* block, ram_addr_t offset, bool last_stage) { int bytes_sent; + uint64_t bytes_xmit; int cont; ram_addr_t current_addr; MemoryRegion *mr = block->mr; @@ -597,17 +598,21 @@ static int ram_save_page(QEMUFile *f, RAMBlock* block, ram_addr_t offset, /* In doubt sent page as normal */ bytes_sent = -1; + bytes_xmit = 0; ret = ram_control_save_page(f, block->offset, - offset, TARGET_PAGE_SIZE, &bytes_sent); + offset, TARGET_PAGE_SIZE, &bytes_xmit); + if (bytes_xmit) { + bytes_sent = bytes_xmit; + } XBZRLE_cache_lock(); current_addr = block->offset + offset; if (ret != RAM_SAVE_CONTROL_NOT_SUPP) { if (ret != RAM_SAVE_CONTROL_DELAYED) { - if (bytes_sent > 0) { + if (bytes_xmit > 0) { acct_info.norm_pages++; - } else if (bytes_sent == 0) { + } else if (bytes_xmit == 0) { acct_info.dup_pages++; } } diff --git a/include/migration/migration.h b/include/migration/migration.h index 703b7d7..5e16af6 100644 --- a/include/migration/migration.h +++ b/include/migration/migration.h @@ -169,6 +169,6 @@ void ram_control_load_hook(QEMUFile *f, uint64_t flags); size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset, ram_addr_t offset, size_t size, - int *bytes_sent); + uint64_t *bytes_sent); #endif diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h index a923cec..94a8c97 100644 --- a/include/migration/qemu-file.h +++ b/include/migration/qemu-file.h @@ -82,7 +82,7 @@ typedef size_t (QEMURamSaveFunc)(QEMUFile *f, void *opaque, ram_addr_t block_offset, ram_addr_t offset, size_t size, - int *bytes_sent); + uint64_t *bytes_sent); /* * Stop any read or write (depending on flags) on the underlying diff --git a/migration/qemu-file.c b/migration/qemu-file.c index e66e557..1a4f986 100644 --- a/migration/qemu-file.c +++ b/migration/qemu-file.c @@ -161,7 +161,8 @@ void ram_control_load_hook(QEMUFile *f, uint64_t flags) } size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset, - ram_addr_t offset, size_t size, int *bytes_sent) + ram_addr_t offset, size_t size, + uint64_t *bytes_sent) { if (f->ops->save_page) { int ret = f->ops->save_page(f, f->opaque, block_offset, diff --git a/migration/rdma.c b/migration/rdma.c index 42d443c..d1c19ff 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -2654,7 +2654,7 @@ static int qemu_rdma_close(void *opaque) */ static size_t qemu_rdma_save_page(QEMUFile *f, void *opaque, ram_addr_t block_offset, ram_addr_t offset, - size_t size, int *bytes_sent) + size_t size, uint64_t *bytes_sent) { QEMUFileRDMA *rfile = opaque; RDMAContext *rdma = rfile->rdma; -- cgit v1.1 From 0fcd8d31402bdc7ef7187f005bee8f0799a4cce0 Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Thu, 12 Feb 2015 19:33:05 +0100 Subject: ram_find_and_save_block: change calling convention Add a parameter to pass the number of bytes written, and make it return the number of pages written instead. Signed-off-by: Juan Quintela --- arch_init.c | 45 +++++++++++++++++++++++---------------------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/arch_init.c b/arch_init.c index f1551ed..df5db66 100644 --- a/arch_init.c +++ b/arch_init.c @@ -654,16 +654,21 @@ static int ram_save_page(QEMUFile *f, RAMBlock* block, ram_addr_t offset, return bytes_sent; } -/* - * ram_find_and_save_block: Finds a page to send and sends it to f +/** + * ram_find_and_save_block: Finds a dirty page and sends it to f * * Called within an RCU critical section. * - * Returns: The number of bytes written. + * Returns: The number of pages written * 0 means no dirty pages + * + * @f: QEMUFile where to send the data + * @last_stage: if we are at the completion stage + * @bytes_transferred: increase it with the number of transferred bytes */ -static int ram_find_and_save_block(QEMUFile *f, bool last_stage) +static int ram_find_and_save_block(QEMUFile *f, bool last_stage, + uint64_t *bytes_transferred) { RAMBlock *block = last_seen_block; ram_addr_t offset = last_offset; @@ -702,7 +707,10 @@ static int ram_find_and_save_block(QEMUFile *f, bool last_stage) last_seen_block = block; last_offset = offset; - return bytes_sent; + + *bytes_transferred += bytes_sent; + + return (bytes_sent != 0); } static uint64_t bytes_transferred; @@ -886,7 +894,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) int ret; int i; int64_t t0; - int total_sent = 0; + int pages_sent = 0; rcu_read_lock(); if (ram_list.version != last_version) { @@ -901,14 +909,14 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); i = 0; while ((ret = qemu_file_rate_limit(f)) == 0) { - int bytes_sent; + int pages; - bytes_sent = ram_find_and_save_block(f, false); - /* no more blocks to sent */ - if (bytes_sent == 0) { + pages = ram_find_and_save_block(f, false, &bytes_transferred); + /* no more pages to sent */ + if (pages == 0) { break; } - total_sent += bytes_sent; + pages_sent += pages; acct_info.iterations++; check_guest_throttling(); /* we want to check in the 1st loop, just in case it was the 1st time @@ -934,12 +942,6 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) */ ram_control_after_iterate(f, RAM_CONTROL_ROUND); - bytes_transferred += total_sent; - - /* - * Do not count these 8 bytes into total_sent, so that we can - * return 0 if no page had been dirtied. - */ qemu_put_be64(f, RAM_SAVE_FLAG_EOS); bytes_transferred += 8; @@ -948,7 +950,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) return ret; } - return total_sent; + return pages_sent; } /* Called with iothread lock */ @@ -964,14 +966,13 @@ static int ram_save_complete(QEMUFile *f, void *opaque) /* flush all remaining blocks regardless of rate limiting */ while (true) { - int bytes_sent; + int pages; - bytes_sent = ram_find_and_save_block(f, true); + pages = ram_find_and_save_block(f, true, &bytes_transferred); /* no more blocks to sent */ - if (bytes_sent == 0) { + if (pages == 0) { break; } - bytes_transferred += bytes_sent; } ram_control_after_iterate(f, RAM_CONTROL_FINISH); -- cgit v1.1 From 87cf878b2ea8d24c78ea3210880538aa31459dfd Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Thu, 12 Feb 2015 20:03:45 +0100 Subject: ram_save_page: change calling covention Add a parameter to pass the number of bytes written, and make it return the number of pages written instead. Signed-off-by: Juan Quintela --- arch_init.c | 57 ++++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 38 insertions(+), 19 deletions(-) diff --git a/arch_init.c b/arch_init.c index df5db66..990c88e 100644 --- a/arch_init.c +++ b/arch_init.c @@ -575,15 +575,21 @@ static void migration_bitmap_sync(void) } } -/* +/** * ram_save_page: Send the given page to the stream * - * Returns: Number of bytes written. + * Returns: Number of pages written. + * + * @f: QEMUFile where to send the data + * @block: block that contains the page we want to send + * @offset: offset inside the block for the page + * @last_stage: if we are at the completion stage + * @bytes_transferred: increase it with the number of transferred bytes */ static int ram_save_page(QEMUFile *f, RAMBlock* block, ram_addr_t offset, - bool last_stage) + bool last_stage, uint64_t *bytes_transferred) { - int bytes_sent; + int pages = -1; uint64_t bytes_xmit; int cont; ram_addr_t current_addr; @@ -597,12 +603,12 @@ static int ram_save_page(QEMUFile *f, RAMBlock* block, ram_addr_t offset, p = memory_region_get_ram_ptr(mr) + offset; /* In doubt sent page as normal */ - bytes_sent = -1; bytes_xmit = 0; ret = ram_control_save_page(f, block->offset, offset, TARGET_PAGE_SIZE, &bytes_xmit); if (bytes_xmit) { - bytes_sent = bytes_xmit; + *bytes_transferred += bytes_xmit; + pages = 1; } XBZRLE_cache_lock(); @@ -618,17 +624,29 @@ static int ram_save_page(QEMUFile *f, RAMBlock* block, ram_addr_t offset, } } else if (is_zero_range(p, TARGET_PAGE_SIZE)) { acct_info.dup_pages++; - bytes_sent = save_block_hdr(f, block, offset, cont, - RAM_SAVE_FLAG_COMPRESS); + *bytes_transferred += save_block_hdr(f, block, offset, cont, + RAM_SAVE_FLAG_COMPRESS); qemu_put_byte(f, 0); - bytes_sent++; + *bytes_transferred += 1; + pages = 1; /* Must let xbzrle know, otherwise a previous (now 0'd) cached * page would be stale */ xbzrle_cache_zero_page(current_addr); } else if (!ram_bulk_stage && migrate_use_xbzrle()) { + int bytes_sent; + bytes_sent = save_xbzrle_page(f, &p, current_addr, block, offset, cont, last_stage); + + if (bytes_sent > 0) { + *bytes_transferred += bytes_sent; + pages = 1; + } else if (bytes_sent == 0) { + pages = 0; + } else { + pages = -1; + } if (!last_stage) { /* Can't send this cached data async, since the cache page * might get updated before it gets to the wire @@ -638,20 +656,22 @@ static int ram_save_page(QEMUFile *f, RAMBlock* block, ram_addr_t offset, } /* XBZRLE overflow or normal page */ - if (bytes_sent == -1) { - bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE); + if (pages == -1) { + *bytes_transferred += save_block_hdr(f, block, offset, cont, + RAM_SAVE_FLAG_PAGE); if (send_async) { qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE); } else { qemu_put_buffer(f, p, TARGET_PAGE_SIZE); } - bytes_sent += TARGET_PAGE_SIZE; + *bytes_transferred += TARGET_PAGE_SIZE; + pages = 1; acct_info.norm_pages++; } XBZRLE_cache_unlock(); - return bytes_sent; + return pages; } /** @@ -673,7 +693,7 @@ static int ram_find_and_save_block(QEMUFile *f, bool last_stage, RAMBlock *block = last_seen_block; ram_addr_t offset = last_offset; bool complete_round = false; - int bytes_sent = 0; + int pages = 0; MemoryRegion *mr; if (!block) @@ -695,10 +715,11 @@ static int ram_find_and_save_block(QEMUFile *f, bool last_stage, ram_bulk_stage = false; } } else { - bytes_sent = ram_save_page(f, block, offset, last_stage); + pages = ram_save_page(f, block, offset, last_stage, + bytes_transferred); /* if page is unmodified, continue to the next */ - if (bytes_sent > 0) { + if (pages > 0) { last_sent_block = block; break; } @@ -708,9 +729,7 @@ static int ram_find_and_save_block(QEMUFile *f, bool last_stage, last_seen_block = block; last_offset = offset; - *bytes_transferred += bytes_sent; - - return (bytes_sent != 0); + return pages; } static uint64_t bytes_transferred; -- cgit v1.1 From f4be0f75f68ec463d07c65cb2f636e6adf1388e6 Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Thu, 12 Feb 2015 20:16:33 +0100 Subject: save_xbzrle_page: change calling convention Add a parameter to pass the number of bytes written, and make it return the number of pages written instead. Signed-off-by: Juan Quintela --- arch_init.c | 44 +++++++++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/arch_init.c b/arch_init.c index 990c88e..d30fd13 100644 --- a/arch_init.c +++ b/arch_init.c @@ -353,11 +353,27 @@ static void xbzrle_cache_zero_page(ram_addr_t current_addr) #define ENCODING_FLAG_XBZRLE 0x1 +/** + * save_xbzrle_page: compress and send current page + * + * Returns: 1 means that we wrote the page + * 0 means that page is identical to the one already sent + * -1 means that xbzrle would be longer than normal + * + * @f: QEMUFile where to send the data + * @current_data: + * @current_addr: + * @block: block that contains the page we want to send + * @offset: offset inside the block for the page + * @last_stage: if we are at the completion stage + * @bytes_transferred: increase it with the number of transferred bytes + */ static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data, ram_addr_t current_addr, RAMBlock *block, - ram_addr_t offset, int cont, bool last_stage) + ram_addr_t offset, int cont, bool last_stage, + uint64_t *bytes_transferred) { - int encoded_len = 0, bytes_sent = -1; + int encoded_len = 0, bytes_xbzrle; uint8_t *prev_cached_page; if (!cache_is_cached(XBZRLE.cache, current_addr, bitmap_sync_count)) { @@ -404,15 +420,16 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data, } /* Send XBZRLE based compressed page */ - bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBZRLE); + bytes_xbzrle = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBZRLE); qemu_put_byte(f, ENCODING_FLAG_XBZRLE); qemu_put_be16(f, encoded_len); qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len); - bytes_sent += encoded_len + 1 + 2; + bytes_xbzrle += encoded_len + 1 + 2; acct_info.xbzrle_pages++; - acct_info.xbzrle_bytes += bytes_sent; + acct_info.xbzrle_bytes += bytes_xbzrle; + *bytes_transferred += bytes_xbzrle; - return bytes_sent; + return 1; } static inline @@ -634,19 +651,8 @@ static int ram_save_page(QEMUFile *f, RAMBlock* block, ram_addr_t offset, */ xbzrle_cache_zero_page(current_addr); } else if (!ram_bulk_stage && migrate_use_xbzrle()) { - int bytes_sent; - - bytes_sent = save_xbzrle_page(f, &p, current_addr, block, - offset, cont, last_stage); - - if (bytes_sent > 0) { - *bytes_transferred += bytes_sent; - pages = 1; - } else if (bytes_sent == 0) { - pages = 0; - } else { - pages = -1; - } + pages = save_xbzrle_page(f, &p, current_addr, block, + offset, cont, last_stage, bytes_transferred); if (!last_stage) { /* Can't send this cached data async, since the cache page * might get updated before it gets to the wire -- cgit v1.1 From 73bab2fc2adeb18b26752937f3cf60afa9658a9c Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Thu, 12 Feb 2015 21:41:39 +0100 Subject: save_block_hdr: we can recalculate the cont parameter here No need to pass it through all the callers. Once there, update last_sent_block here. Signed-off-by: Juan Quintela --- arch_init.c | 47 ++++++++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/arch_init.c b/arch_init.c index d30fd13..7377a07 100644 --- a/arch_init.c +++ b/arch_init.c @@ -305,34 +305,39 @@ uint64_t xbzrle_mig_pages_overflow(void) return acct_info.xbzrle_overflows; } +/* This is the last block that we have visited serching for dirty pages + */ +static RAMBlock *last_seen_block; +/* This is the last block from where we have sent data */ +static RAMBlock *last_sent_block; +static ram_addr_t last_offset; +static unsigned long *migration_bitmap; +static uint64_t migration_dirty_pages; +static uint32_t last_version; +static bool ram_bulk_stage; + static size_t save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset, - int cont, int flag) + int flag) { size_t size; - qemu_put_be64(f, offset | cont | flag); + if (block == last_sent_block) { + offset |= RAM_SAVE_FLAG_CONTINUE; + } + + qemu_put_be64(f, offset | flag); size = 8; - if (!cont) { + if (block != last_sent_block) { qemu_put_byte(f, strlen(block->idstr)); qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr)); size += 1 + strlen(block->idstr); + last_sent_block = block; } return size; } -/* This is the last block that we have visited serching for dirty pages - */ -static RAMBlock *last_seen_block; -/* This is the last block from where we have sent data */ -static RAMBlock *last_sent_block; -static ram_addr_t last_offset; -static unsigned long *migration_bitmap; -static uint64_t migration_dirty_pages; -static uint32_t last_version; -static bool ram_bulk_stage; - /* Update the xbzrle cache to reflect a page that's been sent as all 0. * The important thing is that a stale (not-yet-0'd) page be replaced * by the new data. @@ -370,7 +375,7 @@ static void xbzrle_cache_zero_page(ram_addr_t current_addr) */ static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data, ram_addr_t current_addr, RAMBlock *block, - ram_addr_t offset, int cont, bool last_stage, + ram_addr_t offset, bool last_stage, uint64_t *bytes_transferred) { int encoded_len = 0, bytes_xbzrle; @@ -420,7 +425,7 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data, } /* Send XBZRLE based compressed page */ - bytes_xbzrle = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBZRLE); + bytes_xbzrle = save_block_hdr(f, block, offset, RAM_SAVE_FLAG_XBZRLE); qemu_put_byte(f, ENCODING_FLAG_XBZRLE); qemu_put_be16(f, encoded_len); qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len); @@ -608,15 +613,12 @@ static int ram_save_page(QEMUFile *f, RAMBlock* block, ram_addr_t offset, { int pages = -1; uint64_t bytes_xmit; - int cont; ram_addr_t current_addr; MemoryRegion *mr = block->mr; uint8_t *p; int ret; bool send_async = true; - cont = (block == last_sent_block) ? RAM_SAVE_FLAG_CONTINUE : 0; - p = memory_region_get_ram_ptr(mr) + offset; /* In doubt sent page as normal */ @@ -641,7 +643,7 @@ static int ram_save_page(QEMUFile *f, RAMBlock* block, ram_addr_t offset, } } else if (is_zero_range(p, TARGET_PAGE_SIZE)) { acct_info.dup_pages++; - *bytes_transferred += save_block_hdr(f, block, offset, cont, + *bytes_transferred += save_block_hdr(f, block, offset, RAM_SAVE_FLAG_COMPRESS); qemu_put_byte(f, 0); *bytes_transferred += 1; @@ -652,7 +654,7 @@ static int ram_save_page(QEMUFile *f, RAMBlock* block, ram_addr_t offset, xbzrle_cache_zero_page(current_addr); } else if (!ram_bulk_stage && migrate_use_xbzrle()) { pages = save_xbzrle_page(f, &p, current_addr, block, - offset, cont, last_stage, bytes_transferred); + offset, last_stage, bytes_transferred); if (!last_stage) { /* Can't send this cached data async, since the cache page * might get updated before it gets to the wire @@ -663,7 +665,7 @@ static int ram_save_page(QEMUFile *f, RAMBlock* block, ram_addr_t offset, /* XBZRLE overflow or normal page */ if (pages == -1) { - *bytes_transferred += save_block_hdr(f, block, offset, cont, + *bytes_transferred += save_block_hdr(f, block, offset, RAM_SAVE_FLAG_PAGE); if (send_async) { qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE); @@ -726,7 +728,6 @@ static int ram_find_and_save_block(QEMUFile *f, bool last_stage, /* if page is unmodified, continue to the next */ if (pages > 0) { - last_sent_block = block; break; } } -- cgit v1.1 From f6f14c58d542b306b6a0ff207db793d0aba62aa1 Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Thu, 12 Feb 2015 21:46:40 +0100 Subject: rename save_block_hdr to save_page_header It has always been a page header, not a block header. Once there, the flag argument was only passed to make a bit or with it, just do the or on the caller. Signed-off-by: Juan Quintela --- arch_init.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/arch_init.c b/arch_init.c index 7377a07..c3f7d3f 100644 --- a/arch_init.c +++ b/arch_init.c @@ -316,8 +316,19 @@ static uint64_t migration_dirty_pages; static uint32_t last_version; static bool ram_bulk_stage; -static size_t save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset, - int flag) +/** + * save_page_header: Write page header to wire + * + * If this is the 1st block, it also writes the block identification + * + * Returns: Number of bytes written + * + * @f: QEMUFile where to send the data + * @block: block that contains the page we want to send + * @offset: offset inside the block for the page + * in the lower bits, it contains flags + */ +static size_t save_page_header(QEMUFile *f, RAMBlock *block, ram_addr_t offset) { size_t size; @@ -325,7 +336,7 @@ static size_t save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset, offset |= RAM_SAVE_FLAG_CONTINUE; } - qemu_put_be64(f, offset | flag); + qemu_put_be64(f, offset); size = 8; if (block != last_sent_block) { @@ -425,7 +436,7 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data, } /* Send XBZRLE based compressed page */ - bytes_xbzrle = save_block_hdr(f, block, offset, RAM_SAVE_FLAG_XBZRLE); + bytes_xbzrle = save_page_header(f, block, offset | RAM_SAVE_FLAG_XBZRLE); qemu_put_byte(f, ENCODING_FLAG_XBZRLE); qemu_put_be16(f, encoded_len); qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len); @@ -643,8 +654,8 @@ static int ram_save_page(QEMUFile *f, RAMBlock* block, ram_addr_t offset, } } else if (is_zero_range(p, TARGET_PAGE_SIZE)) { acct_info.dup_pages++; - *bytes_transferred += save_block_hdr(f, block, offset, - RAM_SAVE_FLAG_COMPRESS); + *bytes_transferred += save_page_header(f, block, + offset | RAM_SAVE_FLAG_COMPRESS); qemu_put_byte(f, 0); *bytes_transferred += 1; pages = 1; @@ -665,8 +676,8 @@ static int ram_save_page(QEMUFile *f, RAMBlock* block, ram_addr_t offset, /* XBZRLE overflow or normal page */ if (pages == -1) { - *bytes_transferred += save_block_hdr(f, block, offset, - RAM_SAVE_FLAG_PAGE); + *bytes_transferred += save_page_header(f, block, + offset | RAM_SAVE_FLAG_PAGE); if (send_async) { qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE); } else { -- cgit v1.1 From 1925cebc4b12903e3a66106893fd97e35c5d0baf Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 23 Feb 2015 13:56:41 +0100 Subject: migration: Read JSON VM description on incoming migration One of the really nice things about the VM description format is that it goes over the wire when live migration is happening. Unfortunately QEMU today closes any socket once it sees VM_EOF coming, so we never give the VMDESC the chance to actually land on the wire. This patch makes QEMU read the description as well. This way we ensure that anything wire tapping us in between will get the chance to also interpret the stream. Along the way we also fix virt tests that assume that number_bytes_sent on the sender side is equal to number_bytes_read which was true before the VMDESC patches and is true again with this patch. Signed-off-by: Alexander Graf Reviewed-by: Dr. David Alan Gilbert Tested-by: Dr. David Alan Gilbert Signed-off-by: Juan Quintela --- savevm.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/savevm.c b/savevm.c index ce2b6a2..54510e9 100644 --- a/savevm.c +++ b/savevm.c @@ -930,6 +930,7 @@ int qemu_loadvm_state(QEMUFile *f) uint8_t section_type; unsigned int v; int ret; + int file_error_after_eof = -1; if (qemu_savevm_state_blocked(&local_err)) { error_report("%s", error_get_pretty(local_err)); @@ -1035,6 +1036,24 @@ int qemu_loadvm_state(QEMUFile *f) } } + file_error_after_eof = qemu_file_get_error(f); + + /* + * Try to read in the VMDESC section as well, so that dumping tools that + * intercept our migration stream have the chance to see it. + */ + if (qemu_get_byte(f) == QEMU_VM_VMDESCRIPTION) { + uint32_t size = qemu_get_be32(f); + uint8_t *buf = g_malloc(0x1000); + + while (size > 0) { + uint32_t read_chunk = MIN(size, 0x1000); + qemu_get_buffer(f, buf, read_chunk); + size -= read_chunk; + } + g_free(buf); + } + cpu_synchronize_all_post_init(); ret = 0; @@ -1046,7 +1065,8 @@ out: } if (ret == 0) { - ret = qemu_file_get_error(f); + /* We may not have a VMDESC section, so ignore relative errors */ + ret = file_error_after_eof; } return ret; -- cgit v1.1 From 9850c6047b8b4343e91da4780a41bb88faeb018a Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 23 Feb 2015 13:56:42 +0100 Subject: migration: Allow to suppress vmdesc submission We now always send a JSON blob describing the migration file format as part of the migration stream. However, some tools built around QEMU have proven to stumble over this. This patch gives the user the chance to disable said self-describing part of the migration stream. To disable vmdesc submission, just add -machine suppress-vmdesc=on to your QEMU command line. Signed-off-by: Alexander Graf Signed-off-by: Juan Quintela --- hw/core/machine.c | 20 ++++++++++++++++++++ include/hw/boards.h | 1 + qemu-options.hx | 3 ++- savevm.c | 14 +++++++++++--- 4 files changed, 34 insertions(+), 4 deletions(-) diff --git a/hw/core/machine.c b/hw/core/machine.c index e3a3e2a..cb1185a 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -254,6 +254,20 @@ static void machine_set_iommu(Object *obj, bool value, Error **errp) ms->iommu = value; } +static void machine_set_suppress_vmdesc(Object *obj, bool value, Error **errp) +{ + MachineState *ms = MACHINE(obj); + + ms->suppress_vmdesc = value; +} + +static bool machine_get_suppress_vmdesc(Object *obj, Error **errp) +{ + MachineState *ms = MACHINE(obj); + + return ms->suppress_vmdesc; +} + static int error_on_sysbus_device(SysBusDevice *sbdev, void *opaque) { error_report("Option '-device %s' cannot be handled by this machine", @@ -377,6 +391,12 @@ static void machine_initfn(Object *obj) object_property_set_description(obj, "iommu", "Set on/off to enable/disable Intel IOMMU (VT-d)", NULL); + object_property_add_bool(obj, "suppress-vmdesc", + machine_get_suppress_vmdesc, + machine_set_suppress_vmdesc, NULL); + object_property_set_description(obj, "suppress-vmdesc", + "Set on to disable self-describing migration", + NULL); /* Register notifier when init is done for sysbus sanity checks */ ms->sysbus_notifier.notify = machine_init_notify; diff --git a/include/hw/boards.h b/include/hw/boards.h index f44d6f5..1feea2b 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -143,6 +143,7 @@ struct MachineState { bool usb; char *firmware; bool iommu; + bool suppress_vmdesc; ram_addr_t ram_size; ram_addr_t maxram_size; diff --git a/qemu-options.hx b/qemu-options.hx index ad07dde..c513352 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -39,7 +39,8 @@ DEF("machine", HAS_ARG, QEMU_OPTION_machine, \ " mem-merge=on|off controls memory merge support (default: on)\n" " iommu=on|off controls emulated Intel IOMMU (VT-d) support (default=off)\n" " aes-key-wrap=on|off controls support for AES key wrapping (default=on)\n" - " dea-key-wrap=on|off controls support for DEA key wrapping (default=on)\n", + " dea-key-wrap=on|off controls support for DEA key wrapping (default=on)\n" + " suppress-vmdesc=on|off disables self-describing migration (default=off)\n", QEMU_ARCH_ALL) STEXI @item -machine [type=]@var{name}[,prop=@var{value}[,...]] diff --git a/savevm.c b/savevm.c index 54510e9..e7d97ee 100644 --- a/savevm.c +++ b/savevm.c @@ -710,6 +710,12 @@ int qemu_savevm_state_iterate(QEMUFile *f) return ret; } +static bool should_send_vmdesc(void) +{ + MachineState *machine = MACHINE(qdev_get_machine()); + return !machine->suppress_vmdesc; +} + void qemu_savevm_state_complete(QEMUFile *f) { QJSON *vmdesc; @@ -782,9 +788,11 @@ void qemu_savevm_state_complete(QEMUFile *f) qjson_finish(vmdesc); vmdesc_len = strlen(qjson_get_str(vmdesc)); - qemu_put_byte(f, QEMU_VM_VMDESCRIPTION); - qemu_put_be32(f, vmdesc_len); - qemu_put_buffer(f, (uint8_t *)qjson_get_str(vmdesc), vmdesc_len); + if (should_send_vmdesc()) { + qemu_put_byte(f, QEMU_VM_VMDESCRIPTION); + qemu_put_be32(f, vmdesc_len); + qemu_put_buffer(f, (uint8_t *)qjson_get_str(vmdesc), vmdesc_len); + } object_unref(OBJECT(vmdesc)); qemu_fflush(f); -- cgit v1.1 From 54ed388b29794ab08089f1b5c7b0a03d075c3b5d Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 23 Feb 2015 13:56:43 +0100 Subject: pc: Disable vmdesc submission for old machines Older PC machine types might by accident be backwards live migration compatible, but with the new vmdesc self-describing blob in our live migration stream we would break that compatibility. Also users wouldn't expect massive behaviorial differences when updating to a new version of QEMU while retaining their old machine type, especially not potential breakage in tooling around live migration. So disable vmdesc submission for old PC machine types. Signed-off-by: Alexander Graf Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Juan Quintela --- hw/i386/pc_piix.c | 1 + hw/i386/pc_q35.c | 1 + 2 files changed, 2 insertions(+) diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 8eab4ba..36c69d7 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -335,6 +335,7 @@ static void pc_compat_2_2(MachineState *machine) CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_RTM, 0); x86_cpu_compat_set_features("Broadwell", FEAT_7_0_EBX, CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_RTM, 0); + machine->suppress_vmdesc = true; } static void pc_compat_2_1(MachineState *machine) diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index c0f21fe..bc40537 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -314,6 +314,7 @@ static void pc_compat_2_2(MachineState *machine) CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_RTM, 0); x86_cpu_compat_set_features("Broadwell", FEAT_7_0_EBX, CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_RTM, 0); + machine->suppress_vmdesc = true; } static void pc_compat_2_1(MachineState *machine) -- cgit v1.1