diff options
author | Peter Maydell <peter.maydell@linaro.org> | 2022-01-29 15:55:54 +0000 |
---|---|---|
committer | Peter Maydell <peter.maydell@linaro.org> | 2022-01-29 15:55:54 +0000 |
commit | d90e6f665d3ac197f83d93ad37147fe677521209 (patch) | |
tree | aeda4e790320cbf95554bc189012c4186a781396 | |
parent | 95a6af2a006e7160c958215c20e513ed29a0a76c (diff) | |
parent | 476ebf77fe8909ded10046edf26685bc28438162 (diff) | |
download | qemu-d90e6f665d3ac197f83d93ad37147fe677521209.zip qemu-d90e6f665d3ac197f83d93ad37147fe677521209.tar.gz qemu-d90e6f665d3ac197f83d93ad37147fe677521209.tar.bz2 |
Merge remote-tracking branch 'remotes/quintela-gitlab/tags/migration-20220128-pull-request' into staging
Migration Pull request (Take 2)
Hi
This time I have disabled vmstate canary patches form Dave Gilbert.
Let's see if it works.
Later, Juan.
# gpg: Signature made Fri 28 Jan 2022 18:30:25 GMT
# gpg: using RSA key 1899FF8EDEBF58CCEE034B82F487EF185872D723
# gpg: Good signature from "Juan Quintela <quintela@redhat.com>" [full]
# gpg: aka "Juan Quintela <quintela@trasno.org>" [full]
# Primary key fingerprint: 1899 FF8E DEBF 58CC EE03 4B82 F487 EF18 5872 D723
* remotes/quintela-gitlab/tags/migration-20220128-pull-request: (36 commits)
migration: Move temp page setup and cleanup into separate functions
migration: Simplify unqueue_page()
migration: Add postcopy_has_request()
migration: Enable UFFD_FEATURE_THREAD_ID even without blocktime feat
migration: No off-by-one for pss->page update in host page size
migration: Tally pre-copy, downtime and post-copy bytes independently
migration: Introduce ram_transferred_add()
migration: Don't return for postcopy_send_discard_bm_ram()
migration: Drop return code for disgard ram process
migration: Do chunk page in postcopy_each_ram_send_discard()
migration: Drop postcopy_chunk_hostpages()
migration: Don't return for postcopy_chunk_hostpages()
migration: Drop dead code of ram_debug_dump_bitmap()
migration/ram: clean up unused comment.
migration: Report the error returned when save_live_iterate fails
migration/migration.c: Remove the MIGRATION_STATUS_ACTIVE when migration finished
migration/migration.c: Avoid COLO boot in postcopy migration
migration/migration.c: Add missed default error handler for migration state
Remove unnecessary minimum_version_id_old fields
multifd: Rename pages_used to normal_pages
...
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
38 files changed, 351 insertions, 469 deletions
diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c index b20903e..3646dbf 100644 --- a/hw/acpi/cpu.c +++ b/hw/acpi/cpu.c @@ -297,7 +297,6 @@ static const VMStateDescription vmstate_cpuhp_sts = { .name = "CPU hotplug device state", .version_id = 1, .minimum_version_id = 1, - .minimum_version_id_old = 1, .fields = (VMStateField[]) { VMSTATE_BOOL(is_inserting, AcpiCpuStatus), VMSTATE_BOOL(is_removing, AcpiCpuStatus), @@ -311,7 +310,6 @@ const VMStateDescription vmstate_cpu_hotplug = { .name = "CPU hotplug state", .version_id = 1, .minimum_version_id = 1, - .minimum_version_id_old = 1, .fields = (VMStateField[]) { VMSTATE_UINT32(selector, CPUHotplugState), VMSTATE_UINT8(command, CPUHotplugState), diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c index ebe08ed..bd9bbad 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c @@ -163,7 +163,6 @@ static const VMStateDescription vmstate_memhp_state = { .name = "ich9_pm/memhp", .version_id = 1, .minimum_version_id = 1, - .minimum_version_id_old = 1, .needed = vmstate_test_use_memhp, .fields = (VMStateField[]) { VMSTATE_MEMORY_HOTPLUG(acpi_memory_hotplug, ICH9LPCPMRegs), @@ -181,7 +180,6 @@ static const VMStateDescription vmstate_tco_io_state = { .name = "ich9_pm/tco", .version_id = 1, .minimum_version_id = 1, - .minimum_version_id_old = 1, .needed = vmstate_test_use_tco, .fields = (VMStateField[]) { VMSTATE_STRUCT(tco_regs, ICH9LPCPMRegs, 1, vmstate_tco_io_sts, @@ -208,7 +206,6 @@ static const VMStateDescription vmstate_cpuhp_state = { .name = "ich9_pm/cpuhp", .version_id = 1, .minimum_version_id = 1, - .minimum_version_id_old = 1, .needed = vmstate_test_use_cpuhp, .pre_load = vmstate_cpuhp_pre_load, .fields = (VMStateField[]) { diff --git a/hw/acpi/memory_hotplug.c b/hw/acpi/memory_hotplug.c index d0fffcf..a581a21 100644 --- a/hw/acpi/memory_hotplug.c +++ b/hw/acpi/memory_hotplug.c @@ -318,7 +318,6 @@ static const VMStateDescription vmstate_memhp_sts = { .name = "memory hotplug device state", .version_id = 1, .minimum_version_id = 1, - .minimum_version_id_old = 1, .fields = (VMStateField[]) { VMSTATE_BOOL(is_enabled, MemStatus), VMSTATE_BOOL(is_inserting, MemStatus), @@ -332,7 +331,6 @@ const VMStateDescription vmstate_memory_hotplug = { .name = "memory hotplug state", .version_id = 1, .minimum_version_id = 1, - .minimum_version_id_old = 1, .fields = (VMStateField[]) { VMSTATE_UINT32(selector, MemHotplugState), VMSTATE_STRUCT_VARRAY_POINTER_UINT32(devs, MemHotplugState, dev_count, diff --git a/hw/acpi/piix4.c b/hw/acpi/piix4.c index f0b5fac..cc37fa3 100644 --- a/hw/acpi/piix4.c +++ b/hw/acpi/piix4.c @@ -230,7 +230,6 @@ static const VMStateDescription vmstate_memhp_state = { .name = "piix4_pm/memhp", .version_id = 1, .minimum_version_id = 1, - .minimum_version_id_old = 1, .needed = vmstate_test_use_memhp, .fields = (VMStateField[]) { VMSTATE_MEMORY_HOTPLUG(acpi_memory_hotplug, PIIX4PMState), @@ -255,7 +254,6 @@ static const VMStateDescription vmstate_cpuhp_state = { .name = "piix4_pm/cpuhp", .version_id = 1, .minimum_version_id = 1, - .minimum_version_id_old = 1, .needed = vmstate_test_use_cpuhp, .pre_load = vmstate_cpuhp_pre_load, .fields = (VMStateField[]) { diff --git a/hw/acpi/tco.c b/hw/acpi/tco.c index cf1e68a..4783721 100644 --- a/hw/acpi/tco.c +++ b/hw/acpi/tco.c @@ -239,7 +239,6 @@ const VMStateDescription vmstate_tco_io_sts = { .name = "tco io device status", .version_id = 1, .minimum_version_id = 1, - .minimum_version_id_old = 1, .fields = (VMStateField[]) { VMSTATE_UINT16(tco.rld, TCOIORegs), VMSTATE_UINT8(tco.din, TCOIORegs), diff --git a/hw/audio/pcspk.c b/hw/audio/pcspk.c index b056c05..dfc7ebc 100644 --- a/hw/audio/pcspk.c +++ b/hw/audio/pcspk.c @@ -209,7 +209,6 @@ static const VMStateDescription vmstate_spk = { .name = "pcspk", .version_id = 1, .minimum_version_id = 1, - .minimum_version_id_old = 1, .needed = migrate_needed, .fields = (VMStateField[]) { VMSTATE_UINT8(data_on, PCSpkState), diff --git a/hw/display/macfb.c b/hw/display/macfb.c index 4bd7c3a..2eeb80c 100644 --- a/hw/display/macfb.c +++ b/hw/display/macfb.c @@ -616,7 +616,6 @@ static const VMStateDescription vmstate_macfb = { .name = "macfb", .version_id = 1, .minimum_version_id = 1, - .minimum_version_id_old = 1, .post_load = macfb_post_load, .fields = (VMStateField[]) { VMSTATE_UINT8_ARRAY(color_palette, MacfbState, 256 * 3), diff --git a/hw/dma/xlnx-zdma.c b/hw/dma/xlnx-zdma.c index a5a92b4..4eb7f66 100644 --- a/hw/dma/xlnx-zdma.c +++ b/hw/dma/xlnx-zdma.c @@ -806,7 +806,6 @@ static const VMStateDescription vmstate_zdma = { .name = TYPE_XLNX_ZDMA, .version_id = 1, .minimum_version_id = 1, - .minimum_version_id_old = 1, .fields = (VMStateField[]) { VMSTATE_UINT32_ARRAY(regs, XlnxZDMA, ZDMA_R_MAX), VMSTATE_UINT32(state, XlnxZDMA), diff --git a/hw/dma/xlnx_csu_dma.c b/hw/dma/xlnx_csu_dma.c index 095f954..84f782f 100644 --- a/hw/dma/xlnx_csu_dma.c +++ b/hw/dma/xlnx_csu_dma.c @@ -677,7 +677,6 @@ static const VMStateDescription vmstate_xlnx_csu_dma = { .name = TYPE_XLNX_CSU_DMA, .version_id = 0, .minimum_version_id = 0, - .minimum_version_id_old = 0, .fields = (VMStateField[]) { VMSTATE_PTIMER(src_timer, XlnxCSUDMA), VMSTATE_UINT16(width, XlnxCSUDMA), diff --git a/hw/gpio/imx_gpio.c b/hw/gpio/imx_gpio.c index 7a59180..c7f98b7 100644 --- a/hw/gpio/imx_gpio.c +++ b/hw/gpio/imx_gpio.c @@ -277,7 +277,6 @@ static const VMStateDescription vmstate_imx_gpio = { .name = TYPE_IMX_GPIO, .version_id = 1, .minimum_version_id = 1, - .minimum_version_id_old = 1, .fields = (VMStateField[]) { VMSTATE_UINT32(dr, IMXGPIOState), VMSTATE_UINT32(gdir, IMXGPIOState), diff --git a/hw/misc/bcm2835_mbox.c b/hw/misc/bcm2835_mbox.c index 9f73cbd..04e53c9 100644 --- a/hw/misc/bcm2835_mbox.c +++ b/hw/misc/bcm2835_mbox.c @@ -271,7 +271,6 @@ static const VMStateDescription vmstate_bcm2835_mbox = { .name = TYPE_BCM2835_MBOX, .version_id = 1, .minimum_version_id = 1, - .minimum_version_id_old = 1, .fields = (VMStateField[]) { VMSTATE_BOOL_ARRAY(available, BCM2835MboxState, MBOX_CHAN_COUNT), VMSTATE_STRUCT_ARRAY(mbox, BCM2835MboxState, 2, 1, diff --git a/hw/net/can/can_kvaser_pci.c b/hw/net/can/can_kvaser_pci.c index 168b3a6..94b3a53 100644 --- a/hw/net/can/can_kvaser_pci.c +++ b/hw/net/can/can_kvaser_pci.c @@ -266,7 +266,6 @@ static const VMStateDescription vmstate_kvaser_pci = { .name = "kvaser_pci", .version_id = 1, .minimum_version_id = 1, - .minimum_version_id_old = 1, .fields = (VMStateField[]) { VMSTATE_PCI_DEVICE(dev, KvaserPCIState), /* Load this before sja_state. */ diff --git a/hw/net/can/can_mioe3680_pci.c b/hw/net/can/can_mioe3680_pci.c index 7a79e26..29dc696 100644 --- a/hw/net/can/can_mioe3680_pci.c +++ b/hw/net/can/can_mioe3680_pci.c @@ -203,7 +203,6 @@ static const VMStateDescription vmstate_mioe3680_pci = { .name = "mioe3680_pci", .version_id = 1, .minimum_version_id = 1, - .minimum_version_id_old = 1, .fields = (VMStateField[]) { VMSTATE_PCI_DEVICE(dev, Mioe3680PCIState), VMSTATE_STRUCT(sja_state[0], Mioe3680PCIState, 0, vmstate_can_sja, diff --git a/hw/net/can/can_pcm3680_pci.c b/hw/net/can/can_pcm3680_pci.c index 8ef4e74..e8e57f4 100644 --- a/hw/net/can/can_pcm3680_pci.c +++ b/hw/net/can/can_pcm3680_pci.c @@ -204,7 +204,6 @@ static const VMStateDescription vmstate_pcm3680i_pci = { .name = "pcm3680i_pci", .version_id = 1, .minimum_version_id = 1, - .minimum_version_id_old = 1, .fields = (VMStateField[]) { VMSTATE_PCI_DEVICE(dev, Pcm3680iPCIState), VMSTATE_STRUCT(sja_state[0], Pcm3680iPCIState, 0, diff --git a/hw/net/can/can_sja1000.c b/hw/net/can/can_sja1000.c index 34eea68..3ba803e 100644 --- a/hw/net/can/can_sja1000.c +++ b/hw/net/can/can_sja1000.c @@ -928,7 +928,6 @@ const VMStateDescription vmstate_qemu_can_filter = { .name = "qemu_can_filter", .version_id = 1, .minimum_version_id = 1, - .minimum_version_id_old = 1, .fields = (VMStateField[]) { VMSTATE_UINT32(can_id, qemu_can_filter), VMSTATE_UINT32(can_mask, qemu_can_filter), @@ -952,7 +951,6 @@ const VMStateDescription vmstate_can_sja = { .name = "can_sja", .version_id = 1, .minimum_version_id = 1, - .minimum_version_id_old = 1, .post_load = can_sja_post_load, .fields = (VMStateField[]) { VMSTATE_UINT8(mode, CanSJA1000State), diff --git a/hw/net/can/ctucan_core.c b/hw/net/can/ctucan_core.c index d171c37..f2c3b6a 100644 --- a/hw/net/can/ctucan_core.c +++ b/hw/net/can/ctucan_core.c @@ -617,7 +617,6 @@ const VMStateDescription vmstate_qemu_ctucan_tx_buffer = { .name = "qemu_ctucan_tx_buffer", .version_id = 1, .minimum_version_id = 1, - .minimum_version_id_old = 1, .fields = (VMStateField[]) { VMSTATE_UINT8_ARRAY(data, CtuCanCoreMsgBuffer, CTUCAN_CORE_MSG_MAX_LEN), VMSTATE_END_OF_LIST() @@ -636,7 +635,6 @@ const VMStateDescription vmstate_ctucan = { .name = "ctucan", .version_id = 1, .minimum_version_id = 1, - .minimum_version_id_old = 1, .post_load = ctucan_post_load, .fields = (VMStateField[]) { VMSTATE_UINT32(mode_settings.u32, CtuCanCoreState), diff --git a/hw/net/can/ctucan_pci.c b/hw/net/can/ctucan_pci.c index f1c86cd..50f4ea6 100644 --- a/hw/net/can/ctucan_pci.c +++ b/hw/net/can/ctucan_pci.c @@ -215,7 +215,6 @@ static const VMStateDescription vmstate_ctucan_pci = { .name = "ctucan_pci", .version_id = 1, .minimum_version_id = 1, - .minimum_version_id_old = 1, .fields = (VMStateField[]) { VMSTATE_PCI_DEVICE(dev, CtuCanPCIState), VMSTATE_STRUCT(ctucan_state[0], CtuCanPCIState, 0, vmstate_ctucan, diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c index bb5bee9..462c87d 100644 --- a/hw/ppc/ppc.c +++ b/hw/ppc/ppc.c @@ -1049,7 +1049,6 @@ const VMStateDescription vmstate_ppc_timebase = { .name = "timebase", .version_id = 1, .minimum_version_id = 1, - .minimum_version_id_old = 1, .pre_save = timebase_pre_save, .fields = (VMStateField []) { VMSTATE_UINT64(guest_timebase, PPCTimebase), diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c index f638ff8..cd43945 100644 --- a/hw/scsi/megasas.c +++ b/hw/scsi/megasas.c @@ -2315,7 +2315,6 @@ static const VMStateDescription vmstate_megasas_gen2 = { .name = "megasas-gen2", .version_id = 0, .minimum_version_id = 0, - .minimum_version_id_old = 0, .fields = (VMStateField[]) { VMSTATE_PCI_DEVICE(parent_obj, MegasasState), VMSTATE_MSIX(parent_obj, MegasasState), diff --git a/hw/scsi/mptsas.c b/hw/scsi/mptsas.c index 5181b0c..706cf0d 100644 --- a/hw/scsi/mptsas.c +++ b/hw/scsi/mptsas.c @@ -1363,7 +1363,6 @@ static const VMStateDescription vmstate_mptsas = { .name = "mptsas", .version_id = 0, .minimum_version_id = 0, - .minimum_version_id_old = 0, .post_load = mptsas_post_load, .fields = (VMStateField[]) { VMSTATE_PCI_DEVICE(dev, MPTSASState), diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c index 72da12f..688eccd 100644 --- a/hw/virtio/virtio-mmio.c +++ b/hw/virtio/virtio-mmio.c @@ -592,7 +592,6 @@ static const VMStateDescription vmstate_virtio_mmio = { .name = "virtio_mmio", .version_id = 1, .minimum_version_id = 1, - .minimum_version_id_old = 1, .fields = (VMStateField[]) { VMSTATE_END_OF_LIST() }, diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 750aa47..f9cf959 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -131,7 +131,6 @@ static const VMStateDescription vmstate_virtio_pci = { .name = "virtio_pci", .version_id = 1, .minimum_version_id = 1, - .minimum_version_id_old = 1, .fields = (VMStateField[]) { VMSTATE_END_OF_LIST() }, diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index aae72fb..9e8f51d 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -2808,7 +2808,6 @@ static const VMStateDescription vmstate_virtio = { .name = "virtio", .version_id = 1, .minimum_version_id = 1, - .minimum_version_id_old = 1, .fields = (VMStateField[]) { VMSTATE_END_OF_LIST() }, diff --git a/migration/migration.c b/migration/migration.c index 0652165..bcc385b 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1014,6 +1014,9 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) info->ram->page_size = page_size; info->ram->multifd_bytes = ram_counters.multifd_bytes; info->ram->pages_per_second = s->pages_per_second; + info->ram->precopy_bytes = ram_counters.precopy_bytes; + info->ram->downtime_bytes = ram_counters.downtime_bytes; + info->ram->postcopy_bytes = ram_counters.postcopy_bytes; if (migrate_use_xbzrle()) { info->has_xbzrle_cache = true; @@ -2991,10 +2994,7 @@ static int postcopy_start(MigrationState *ms) * that are dirty */ if (migrate_postcopy_ram()) { - if (ram_postcopy_send_discard_bitmap(ms)) { - error_report("postcopy send discard bitmap failed"); - goto fail; - } + ram_postcopy_send_discard_bitmap(ms); } /* @@ -3205,7 +3205,7 @@ static void migration_completion(MigrationState *s) qemu_mutex_unlock_iothread(); trace_migration_completion_postcopy_end_after_complete(); - } else if (s->state == MIGRATION_STATUS_CANCELLING) { + } else { goto fail; } @@ -3230,7 +3230,11 @@ static void migration_completion(MigrationState *s) goto fail_invalidate; } - if (!migrate_colo_enabled()) { + if (migrate_colo_enabled() && s->state == MIGRATION_STATUS_ACTIVE) { + /* COLO does not support postcopy */ + migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE, + MIGRATION_STATUS_COLO); + } else { migrate_set_state(&s->state, current_active_state, MIGRATION_STATUS_COMPLETED); } @@ -3621,16 +3625,6 @@ static void migration_iteration_finish(MigrationState *s) "COLO enabled", __func__); } migrate_start_colo_process(s); - /* - * Fixme: we will run VM in COLO no matter its old running state. - * After exited COLO, we will keep running. - */ - /* Fallthrough */ - case MIGRATION_STATUS_ACTIVE: - /* - * We should really assert here, but since it's during - * migration, let's try to reduce the usage of assertions. - */ s->vm_was_running = true; /* Fallthrough */ case MIGRATION_STATUS_FAILED: diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c index da62017..aba1c88 100644 --- a/migration/multifd-zlib.c +++ b/migration/multifd-zlib.c @@ -51,16 +51,16 @@ static int zlib_send_setup(MultiFDSendParams *p, Error **errp) zs->opaque = Z_NULL; if (deflateInit(zs, migrate_multifd_zlib_level()) != Z_OK) { g_free(z); - error_setg(errp, "multifd %d: deflate init failed", p->id); + error_setg(errp, "multifd %u: deflate init failed", p->id); return -1; } - /* To be safe, we reserve twice the size of the packet */ - z->zbuff_len = MULTIFD_PACKET_SIZE * 2; + /* This is the maxium size of the compressed buffer */ + z->zbuff_len = compressBound(MULTIFD_PACKET_SIZE); z->zbuff = g_try_malloc(z->zbuff_len); if (!z->zbuff) { deflateEnd(&z->zs); g_free(z); - error_setg(errp, "multifd %d: out of memory for zbuff", p->id); + error_setg(errp, "multifd %u: out of memory for zbuff", p->id); return -1; } p->data = z; @@ -106,16 +106,16 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) int ret; uint32_t i; - for (i = 0; i < p->pages->num; i++) { + for (i = 0; i < p->normal_num; i++) { uint32_t available = z->zbuff_len - out_size; int flush = Z_NO_FLUSH; - if (i == p->pages->num - 1) { + if (i == p->normal_num - 1) { flush = Z_SYNC_FLUSH; } zs->avail_in = page_size; - zs->next_in = p->pages->block->host + p->pages->offset[i]; + zs->next_in = p->pages->block->host + p->normal[i]; zs->avail_out = available; zs->next_out = z->zbuff + out_size; @@ -132,17 +132,20 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) ret = deflate(zs, flush); } while (ret == Z_OK && zs->avail_in && zs->avail_out); if (ret == Z_OK && zs->avail_in) { - error_setg(errp, "multifd %d: deflate failed to compress all input", + error_setg(errp, "multifd %u: deflate failed to compress all input", p->id); return -1; } if (ret != Z_OK) { - error_setg(errp, "multifd %d: deflate returned %d instead of Z_OK", + error_setg(errp, "multifd %u: deflate returned %d instead of Z_OK", p->id, ret); return -1; } out_size += available - zs->avail_out; } + p->iov[p->iovs_num].iov_base = z->zbuff; + p->iov[p->iovs_num].iov_len = out_size; + p->iovs_num++; p->next_packet_size = out_size; p->flags |= MULTIFD_FLAG_ZLIB; @@ -150,25 +153,6 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) } /** - * zlib_send_write: do the actual write of the data - * - * Do the actual write of the comprresed buffer. - * - * Returns 0 for success or -1 for error - * - * @p: Params for the channel that we are using - * @used: number of pages used - * @errp: pointer to an error - */ -static int zlib_send_write(MultiFDSendParams *p, uint32_t used, Error **errp) -{ - struct zlib_data *z = p->data; - - return qio_channel_write_all(p->c, (void *)z->zbuff, p->next_packet_size, - errp); -} - -/** * zlib_recv_setup: setup receive side * * Create the compressed channel and buffer. @@ -190,7 +174,7 @@ static int zlib_recv_setup(MultiFDRecvParams *p, Error **errp) zs->avail_in = 0; zs->next_in = Z_NULL; if (inflateInit(zs) != Z_OK) { - error_setg(errp, "multifd %d: inflate init failed", p->id); + error_setg(errp, "multifd %u: inflate init failed", p->id); return -1; } /* To be safe, we reserve twice the size of the packet */ @@ -198,7 +182,7 @@ static int zlib_recv_setup(MultiFDRecvParams *p, Error **errp) z->zbuff = g_try_malloc(z->zbuff_len); if (!z->zbuff) { inflateEnd(zs); - error_setg(errp, "multifd %d: out of memory for zbuff", p->id); + error_setg(errp, "multifd %u: out of memory for zbuff", p->id); return -1; } return 0; @@ -241,13 +225,13 @@ static int zlib_recv_pages(MultiFDRecvParams *p, Error **errp) uint32_t in_size = p->next_packet_size; /* we measure the change of total_out */ uint32_t out_size = zs->total_out; - uint32_t expected_size = p->pages->num * qemu_target_page_size(); + uint32_t expected_size = p->normal_num * page_size; uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK; int ret; int i; if (flags != MULTIFD_FLAG_ZLIB) { - error_setg(errp, "multifd %d: flags received %x flags expected %x", + error_setg(errp, "multifd %u: flags received %x flags expected %x", p->id, flags, MULTIFD_FLAG_ZLIB); return -1; } @@ -260,16 +244,16 @@ static int zlib_recv_pages(MultiFDRecvParams *p, Error **errp) zs->avail_in = in_size; zs->next_in = z->zbuff; - for (i = 0; i < p->pages->num; i++) { + for (i = 0; i < p->normal_num; i++) { int flush = Z_NO_FLUSH; unsigned long start = zs->total_out; - if (i == p->pages->num - 1) { + if (i == p->normal_num - 1) { flush = Z_SYNC_FLUSH; } zs->avail_out = page_size; - zs->next_out = p->pages->block->host + p->pages->offset[i]; + zs->next_out = p->host + p->normal[i]; /* * Welcome to inflate semantics @@ -284,19 +268,19 @@ static int zlib_recv_pages(MultiFDRecvParams *p, Error **errp) } while (ret == Z_OK && zs->avail_in && (zs->total_out - start) < page_size); if (ret == Z_OK && (zs->total_out - start) < page_size) { - error_setg(errp, "multifd %d: inflate generated too few output", + error_setg(errp, "multifd %u: inflate generated too few output", p->id); return -1; } if (ret != Z_OK) { - error_setg(errp, "multifd %d: inflate returned %d instead of Z_OK", + error_setg(errp, "multifd %u: inflate returned %d instead of Z_OK", p->id, ret); return -1; } } out_size = zs->total_out - out_size; if (out_size != expected_size) { - error_setg(errp, "multifd %d: packet size received %d size expected %d", + error_setg(errp, "multifd %u: packet size received %u size expected %u", p->id, out_size, expected_size); return -1; } @@ -307,7 +291,6 @@ static MultiFDMethods multifd_zlib_ops = { .send_setup = zlib_send_setup, .send_cleanup = zlib_send_cleanup, .send_prepare = zlib_send_prepare, - .send_write = zlib_send_write, .recv_setup = zlib_recv_setup, .recv_cleanup = zlib_recv_cleanup, .recv_pages = zlib_recv_pages diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c index 2d5b611..d788d30 100644 --- a/migration/multifd-zstd.c +++ b/migration/multifd-zstd.c @@ -55,7 +55,7 @@ static int zstd_send_setup(MultiFDSendParams *p, Error **errp) z->zcs = ZSTD_createCStream(); if (!z->zcs) { g_free(z); - error_setg(errp, "multifd %d: zstd createCStream failed", p->id); + error_setg(errp, "multifd %u: zstd createCStream failed", p->id); return -1; } @@ -63,17 +63,17 @@ static int zstd_send_setup(MultiFDSendParams *p, Error **errp) if (ZSTD_isError(res)) { ZSTD_freeCStream(z->zcs); g_free(z); - error_setg(errp, "multifd %d: initCStream failed with error %s", + error_setg(errp, "multifd %u: initCStream failed with error %s", p->id, ZSTD_getErrorName(res)); return -1; } - /* To be safe, we reserve twice the size of the packet */ - z->zbuff_len = MULTIFD_PACKET_SIZE * 2; + /* This is the maxium size of the compressed buffer */ + z->zbuff_len = ZSTD_compressBound(MULTIFD_PACKET_SIZE); z->zbuff = g_try_malloc(z->zbuff_len); if (!z->zbuff) { ZSTD_freeCStream(z->zcs); g_free(z); - error_setg(errp, "multifd %d: out of memory for zbuff", p->id); + error_setg(errp, "multifd %u: out of memory for zbuff", p->id); return -1; } return 0; @@ -121,13 +121,13 @@ static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) z->out.size = z->zbuff_len; z->out.pos = 0; - for (i = 0; i < p->pages->num; i++) { + for (i = 0; i < p->normal_num; i++) { ZSTD_EndDirective flush = ZSTD_e_continue; - if (i == p->pages->num - 1) { + if (i == p->normal_num - 1) { flush = ZSTD_e_flush; } - z->in.src = p->pages->block->host + p->pages->offset[i]; + z->in.src = p->pages->block->host + p->normal[i]; z->in.size = page_size; z->in.pos = 0; @@ -144,16 +144,19 @@ static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) } while (ret > 0 && (z->in.size - z->in.pos > 0) && (z->out.size - z->out.pos > 0)); if (ret > 0 && (z->in.size - z->in.pos > 0)) { - error_setg(errp, "multifd %d: compressStream buffer too small", + error_setg(errp, "multifd %u: compressStream buffer too small", p->id); return -1; } if (ZSTD_isError(ret)) { - error_setg(errp, "multifd %d: compressStream error %s", + error_setg(errp, "multifd %u: compressStream error %s", p->id, ZSTD_getErrorName(ret)); return -1; } } + p->iov[p->iovs_num].iov_base = z->zbuff; + p->iov[p->iovs_num].iov_len = z->out.pos; + p->iovs_num++; p->next_packet_size = z->out.pos; p->flags |= MULTIFD_FLAG_ZSTD; @@ -161,25 +164,6 @@ static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) } /** - * zstd_send_write: do the actual write of the data - * - * Do the actual write of the comprresed buffer. - * - * Returns 0 for success or -1 for error - * - * @p: Params for the channel that we are using - * @used: number of pages used - * @errp: pointer to an error - */ -static int zstd_send_write(MultiFDSendParams *p, uint32_t used, Error **errp) -{ - struct zstd_data *z = p->data; - - return qio_channel_write_all(p->c, (void *)z->zbuff, p->next_packet_size, - errp); -} - -/** * zstd_recv_setup: setup receive side * * Create the compressed channel and buffer. @@ -198,7 +182,7 @@ static int zstd_recv_setup(MultiFDRecvParams *p, Error **errp) z->zds = ZSTD_createDStream(); if (!z->zds) { g_free(z); - error_setg(errp, "multifd %d: zstd createDStream failed", p->id); + error_setg(errp, "multifd %u: zstd createDStream failed", p->id); return -1; } @@ -206,7 +190,7 @@ static int zstd_recv_setup(MultiFDRecvParams *p, Error **errp) if (ZSTD_isError(ret)) { ZSTD_freeDStream(z->zds); g_free(z); - error_setg(errp, "multifd %d: initDStream failed with error %s", + error_setg(errp, "multifd %u: initDStream failed with error %s", p->id, ZSTD_getErrorName(ret)); return -1; } @@ -217,7 +201,7 @@ static int zstd_recv_setup(MultiFDRecvParams *p, Error **errp) if (!z->zbuff) { ZSTD_freeDStream(z->zds); g_free(z); - error_setg(errp, "multifd %d: out of memory for zbuff", p->id); + error_setg(errp, "multifd %u: out of memory for zbuff", p->id); return -1; } return 0; @@ -258,14 +242,14 @@ static int zstd_recv_pages(MultiFDRecvParams *p, Error **errp) uint32_t in_size = p->next_packet_size; uint32_t out_size = 0; size_t page_size = qemu_target_page_size(); - uint32_t expected_size = p->pages->num * page_size; + uint32_t expected_size = p->normal_num * page_size; uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK; struct zstd_data *z = p->data; int ret; int i; if (flags != MULTIFD_FLAG_ZSTD) { - error_setg(errp, "multifd %d: flags received %x flags expected %x", + error_setg(errp, "multifd %u: flags received %x flags expected %x", p->id, flags, MULTIFD_FLAG_ZSTD); return -1; } @@ -279,8 +263,8 @@ static int zstd_recv_pages(MultiFDRecvParams *p, Error **errp) z->in.size = in_size; z->in.pos = 0; - for (i = 0; i < p->pages->num; i++) { - z->out.dst = p->pages->block->host + p->pages->offset[i]; + for (i = 0; i < p->normal_num; i++) { + z->out.dst = p->host + p->normal[i]; z->out.size = page_size; z->out.pos = 0; @@ -297,19 +281,19 @@ static int zstd_recv_pages(MultiFDRecvParams *p, Error **errp) } while (ret > 0 && (z->in.size - z->in.pos > 0) && (z->out.pos < page_size)); if (ret > 0 && (z->out.pos < page_size)) { - error_setg(errp, "multifd %d: decompressStream buffer too small", + error_setg(errp, "multifd %u: decompressStream buffer too small", p->id); return -1; } if (ZSTD_isError(ret)) { - error_setg(errp, "multifd %d: decompressStream returned %s", + error_setg(errp, "multifd %u: decompressStream returned %s", p->id, ZSTD_getErrorName(ret)); return ret; } out_size += z->out.pos; } if (out_size != expected_size) { - error_setg(errp, "multifd %d: packet size received %d size expected %d", + error_setg(errp, "multifd %u: packet size received %u size expected %u", p->id, out_size, expected_size); return -1; } @@ -320,7 +304,6 @@ static MultiFDMethods multifd_zstd_ops = { .send_setup = zstd_send_setup, .send_cleanup = zstd_send_cleanup, .send_prepare = zstd_send_prepare, - .send_write = zstd_send_write, .recv_setup = zstd_recv_setup, .recv_cleanup = zstd_recv_cleanup, .recv_pages = zstd_recv_pages diff --git a/migration/multifd.c b/migration/multifd.c index 3242f68..76b57a7 100644 --- a/migration/multifd.c +++ b/migration/multifd.c @@ -86,28 +86,21 @@ static void nocomp_send_cleanup(MultiFDSendParams *p, Error **errp) */ static int nocomp_send_prepare(MultiFDSendParams *p, Error **errp) { - p->next_packet_size = p->pages->num * qemu_target_page_size(); + MultiFDPages_t *pages = p->pages; + size_t page_size = qemu_target_page_size(); + + for (int i = 0; i < p->normal_num; i++) { + p->iov[p->iovs_num].iov_base = pages->block->host + p->normal[i]; + p->iov[p->iovs_num].iov_len = page_size; + p->iovs_num++; + } + + p->next_packet_size = p->normal_num * page_size; p->flags |= MULTIFD_FLAG_NOCOMP; return 0; } /** - * nocomp_send_write: do the actual write of the data - * - * For no compression we just have to write the data. - * - * Returns 0 for success or -1 for error - * - * @p: Params for the channel that we are using - * @used: number of pages used - * @errp: pointer to an error - */ -static int nocomp_send_write(MultiFDSendParams *p, uint32_t used, Error **errp) -{ - return qio_channel_writev_all(p->c, p->pages->iov, used, errp); -} - -/** * nocomp_recv_setup: setup receive side * * For no compression this function does nothing. @@ -146,20 +139,24 @@ static void nocomp_recv_cleanup(MultiFDRecvParams *p) static int nocomp_recv_pages(MultiFDRecvParams *p, Error **errp) { uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK; + size_t page_size = qemu_target_page_size(); if (flags != MULTIFD_FLAG_NOCOMP) { - error_setg(errp, "multifd %d: flags received %x flags expected %x", + error_setg(errp, "multifd %u: flags received %x flags expected %x", p->id, flags, MULTIFD_FLAG_NOCOMP); return -1; } - return qio_channel_readv_all(p->c, p->pages->iov, p->pages->num, errp); + for (int i = 0; i < p->normal_num; i++) { + p->iov[i].iov_base = p->host + p->normal[i]; + p->iov[i].iov_len = page_size; + } + return qio_channel_readv_all(p->c, p->iov, p->normal_num, errp); } static MultiFDMethods multifd_nocomp_ops = { .send_setup = nocomp_send_setup, .send_cleanup = nocomp_send_cleanup, .send_prepare = nocomp_send_prepare, - .send_write = nocomp_send_write, .recv_setup = nocomp_recv_setup, .recv_cleanup = nocomp_recv_cleanup, .recv_pages = nocomp_recv_pages @@ -212,8 +209,8 @@ static int multifd_recv_initial_packet(QIOChannel *c, Error **errp) } if (msg.version != MULTIFD_VERSION) { - error_setg(errp, "multifd: received packet version %d " - "expected %d", msg.version, MULTIFD_VERSION); + error_setg(errp, "multifd: received packet version %u " + "expected %u", msg.version, MULTIFD_VERSION); return -1; } @@ -229,8 +226,8 @@ static int multifd_recv_initial_packet(QIOChannel *c, Error **errp) } if (msg.id > migrate_multifd_channels()) { - error_setg(errp, "multifd: received channel version %d " - "expected %d", msg.version, MULTIFD_VERSION); + error_setg(errp, "multifd: received channel version %u " + "expected %u", msg.version, MULTIFD_VERSION); return -1; } @@ -242,7 +239,6 @@ static MultiFDPages_t *multifd_pages_init(size_t size) MultiFDPages_t *pages = g_new0(MultiFDPages_t, 1); pages->allocated = size; - pages->iov = g_new0(struct iovec, size); pages->offset = g_new0(ram_addr_t, size); return pages; @@ -254,8 +250,6 @@ static void multifd_pages_clear(MultiFDPages_t *pages) pages->allocated = 0; pages->packet_num = 0; pages->block = NULL; - g_free(pages->iov); - pages->iov = NULL; g_free(pages->offset); pages->offset = NULL; g_free(pages); @@ -268,7 +262,7 @@ static void multifd_send_fill_packet(MultiFDSendParams *p) packet->flags = cpu_to_be32(p->flags); packet->pages_alloc = cpu_to_be32(p->pages->allocated); - packet->pages_used = cpu_to_be32(p->pages->num); + packet->normal_pages = cpu_to_be32(p->normal_num); packet->next_packet_size = cpu_to_be32(p->next_packet_size); packet->packet_num = cpu_to_be64(p->packet_num); @@ -276,9 +270,9 @@ static void multifd_send_fill_packet(MultiFDSendParams *p) strncpy(packet->ramblock, p->pages->block->idstr, 256); } - for (i = 0; i < p->pages->num; i++) { + for (i = 0; i < p->normal_num; i++) { /* there are architectures where ram_addr_t is 32 bit */ - uint64_t temp = p->pages->offset[i]; + uint64_t temp = p->normal[i]; packet->offset[i] = cpu_to_be64(temp); } @@ -288,7 +282,7 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) { MultiFDPacket_t *packet = p->packet; size_t page_size = qemu_target_page_size(); - uint32_t pages_max = MULTIFD_PACKET_SIZE / page_size; + uint32_t page_count = MULTIFD_PACKET_SIZE / page_size; RAMBlock *block; int i; @@ -303,7 +297,7 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) packet->version = be32_to_cpu(packet->version); if (packet->version != MULTIFD_VERSION) { error_setg(errp, "multifd: received packet " - "version %d and expected version %d", + "version %u and expected version %u", packet->version, MULTIFD_VERSION); return -1; } @@ -315,33 +309,25 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) * If we received a packet that is 100 times bigger than expected * just stop migration. It is a magic number. */ - if (packet->pages_alloc > pages_max * 100) { + if (packet->pages_alloc > page_count) { error_setg(errp, "multifd: received packet " - "with size %d and expected a maximum size of %d", - packet->pages_alloc, pages_max * 100) ; + "with size %u and expected a size of %u", + packet->pages_alloc, page_count) ; return -1; } - /* - * We received a packet that is bigger than expected but inside - * reasonable limits (see previous comment). Just reallocate. - */ - if (packet->pages_alloc > p->pages->allocated) { - multifd_pages_clear(p->pages); - p->pages = multifd_pages_init(packet->pages_alloc); - } - p->pages->num = be32_to_cpu(packet->pages_used); - if (p->pages->num > packet->pages_alloc) { + p->normal_num = be32_to_cpu(packet->normal_pages); + if (p->normal_num > packet->pages_alloc) { error_setg(errp, "multifd: received packet " - "with %d pages and expected maximum pages are %d", - p->pages->num, packet->pages_alloc) ; + "with %u pages and expected maximum pages are %u", + p->normal_num, packet->pages_alloc) ; return -1; } p->next_packet_size = be32_to_cpu(packet->next_packet_size); p->packet_num = be64_to_cpu(packet->packet_num); - if (p->pages->num == 0) { + if (p->normal_num == 0) { return 0; } @@ -354,8 +340,8 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) return -1; } - p->pages->block = block; - for (i = 0; i < p->pages->num; i++) { + p->host = block->host; + for (i = 0; i < p->normal_num; i++) { uint64_t offset = be64_to_cpu(packet->offset[i]); if (offset > (block->used_length - page_size)) { @@ -364,9 +350,7 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) offset, block->used_length); return -1; } - p->pages->offset[i] = offset; - p->pages->iov[i].iov_base = block->host + offset; - p->pages->iov[i].iov_len = page_size; + p->normal[i] = offset; } return 0; @@ -470,8 +454,6 @@ int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset) if (pages->block == block) { pages->offset[pages->num] = offset; - pages->iov[pages->num].iov_base = block->host + offset; - pages->iov[pages->num].iov_len = qemu_target_page_size(); pages->num++; if (pages->num < pages->allocated) { @@ -567,6 +549,10 @@ void multifd_save_cleanup(void) p->packet_len = 0; g_free(p->packet); p->packet = NULL; + g_free(p->iov); + p->iov = NULL; + g_free(p->normal); + p->normal = NULL; multifd_send_state->ops->send_cleanup(p, &local_err); if (local_err) { migrate_set_error(migrate_get_current(), local_err); @@ -651,11 +637,17 @@ static void *multifd_send_thread(void *opaque) qemu_mutex_lock(&p->mutex); if (p->pending_job) { - uint32_t used = p->pages->num; uint64_t packet_num = p->packet_num; uint32_t flags = p->flags; + p->iovs_num = 1; + p->normal_num = 0; + + for (int i = 0; i < p->pages->num; i++) { + p->normal[p->normal_num] = p->pages->offset[i]; + p->normal_num++; + } - if (used) { + if (p->normal_num) { ret = multifd_send_state->ops->send_prepare(p, &local_err); if (ret != 0) { qemu_mutex_unlock(&p->mutex); @@ -665,27 +657,23 @@ static void *multifd_send_thread(void *opaque) multifd_send_fill_packet(p); p->flags = 0; p->num_packets++; - p->num_pages += used; + p->total_normal_pages += p->normal_num; p->pages->num = 0; p->pages->block = NULL; qemu_mutex_unlock(&p->mutex); - trace_multifd_send(p->id, packet_num, used, flags, + trace_multifd_send(p->id, packet_num, p->normal_num, flags, p->next_packet_size); - ret = qio_channel_write_all(p->c, (void *)p->packet, - p->packet_len, &local_err); + p->iov[0].iov_len = p->packet_len; + p->iov[0].iov_base = p->packet; + + ret = qio_channel_writev_all(p->c, p->iov, p->iovs_num, + &local_err); if (ret != 0) { break; } - if (used) { - ret = multifd_send_state->ops->send_write(p, used, &local_err); - if (ret != 0) { - break; - } - } - qemu_mutex_lock(&p->mutex); p->pending_job--; qemu_mutex_unlock(&p->mutex); @@ -724,7 +712,7 @@ out: qemu_mutex_unlock(&p->mutex); rcu_unregister_thread(); - trace_multifd_send_thread_end(p->id, p->num_packets, p->num_pages); + trace_multifd_send_thread_end(p->id, p->num_packets, p->total_normal_pages); return NULL; } @@ -922,6 +910,9 @@ int multifd_save_setup(Error **errp) p->packet->version = cpu_to_be32(MULTIFD_VERSION); p->name = g_strdup_printf("multifdsend_%d", i); p->tls_hostname = g_strdup(s->hostname); + /* We need one extra place for the packet header */ + p->iov = g_new0(struct iovec, page_count + 1); + p->normal = g_new0(ram_addr_t, page_count); socket_send_channel_create(multifd_new_send_channel_async, p); } @@ -1016,11 +1007,13 @@ int multifd_load_cleanup(Error **errp) qemu_sem_destroy(&p->sem_sync); g_free(p->name); p->name = NULL; - multifd_pages_clear(p->pages); - p->pages = NULL; p->packet_len = 0; g_free(p->packet); p->packet = NULL; + g_free(p->iov); + p->iov = NULL; + g_free(p->normal); + p->normal = NULL; multifd_recv_state->ops->recv_cleanup(p); } qemu_sem_destroy(&multifd_recv_state->sem_sync); @@ -1069,7 +1062,6 @@ static void *multifd_recv_thread(void *opaque) rcu_register_thread(); while (true) { - uint32_t used; uint32_t flags; if (p->quit) { @@ -1092,17 +1084,16 @@ static void *multifd_recv_thread(void *opaque) break; } - used = p->pages->num; flags = p->flags; /* recv methods don't know how to handle the SYNC flag */ p->flags &= ~MULTIFD_FLAG_SYNC; - trace_multifd_recv(p->id, p->packet_num, used, flags, + trace_multifd_recv(p->id, p->packet_num, p->normal_num, flags, p->next_packet_size); p->num_packets++; - p->num_pages += used; + p->total_normal_pages += p->normal_num; qemu_mutex_unlock(&p->mutex); - if (used) { + if (p->normal_num) { ret = multifd_recv_state->ops->recv_pages(p, &local_err); if (ret != 0) { break; @@ -1124,7 +1115,7 @@ static void *multifd_recv_thread(void *opaque) qemu_mutex_unlock(&p->mutex); rcu_unregister_thread(); - trace_multifd_recv_thread_end(p->id, p->num_packets, p->num_pages); + trace_multifd_recv_thread_end(p->id, p->num_packets, p->total_normal_pages); return NULL; } @@ -1156,11 +1147,12 @@ int multifd_load_setup(Error **errp) qemu_sem_init(&p->sem_sync, 0); p->quit = false; p->id = i; - p->pages = multifd_pages_init(page_count); p->packet_len = sizeof(MultiFDPacket_t) + sizeof(uint64_t) * page_count; p->packet = g_malloc0(p->packet_len); p->name = g_strdup_printf("multifdrecv_%d", i); + p->iov = g_new0(struct iovec, page_count); + p->normal = g_new0(ram_addr_t, page_count); } for (i = 0; i < thread_count; i++) { diff --git a/migration/multifd.h b/migration/multifd.h index e57adc7..4dda900 100644 --- a/migration/multifd.h +++ b/migration/multifd.h @@ -44,7 +44,8 @@ typedef struct { uint32_t flags; /* maximum number of allocated pages */ uint32_t pages_alloc; - uint32_t pages_used; + /* non zero pages */ + uint32_t normal_pages; /* size of the next packet that contains pages */ uint32_t next_packet_size; uint64_t packet_num; @@ -62,8 +63,6 @@ typedef struct { uint64_t packet_num; /* offset of each page */ ram_addr_t *offset; - /* pointer to each page */ - struct iovec *iov; RAMBlock *block; } MultiFDPages_t; @@ -106,10 +105,18 @@ typedef struct { /* thread local variables */ /* packets sent through this channel */ uint64_t num_packets; - /* pages sent through this channel */ - uint64_t num_pages; + /* non zero pages sent through this channel */ + uint64_t total_normal_pages; /* syncs main thread and channels */ QemuSemaphore sem_sync; + /* buffers to send */ + struct iovec *iov; + /* number of iovs used */ + uint32_t iovs_num; + /* Pages that are not zero */ + ram_addr_t *normal; + /* num of non zero pages */ + uint32_t normal_num; /* used for compression methods */ void *data; } MultiFDSendParams; @@ -130,8 +137,8 @@ typedef struct { bool running; /* should this thread finish */ bool quit; - /* array of pages to receive */ - MultiFDPages_t *pages; + /* ramblock host address */ + uint8_t *host; /* packet allocated len */ uint32_t packet_len; /* pointer to the packet */ @@ -145,10 +152,16 @@ typedef struct { uint32_t next_packet_size; /* packets sent through this channel */ uint64_t num_packets; - /* pages sent through this channel */ - uint64_t num_pages; + /* non zero pages recv through this channel */ + uint64_t total_normal_pages; /* syncs main thread and channels */ QemuSemaphore sem_sync; + /* buffers to recv */ + struct iovec *iov; + /* Pages that are not zero */ + ram_addr_t *normal; + /* num of non zero pages */ + uint32_t normal_num; /* used for de-compression methods */ void *data; } MultiFDRecvParams; @@ -160,8 +173,6 @@ typedef struct { void (*send_cleanup)(MultiFDSendParams *p, Error **errp); /* Prepare the send packet */ int (*send_prepare)(MultiFDSendParams *p, Error **errp); - /* Write the send packet */ - int (*send_write)(MultiFDSendParams *p, uint32_t used, Error **errp); /* Setup for receiving side */ int (*recv_setup)(MultiFDRecvParams *p, Error **errp); /* Cleanup for receiving side */ diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index d18b5d0..e662dd0 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -283,15 +283,13 @@ static bool ufd_check_and_apply(int ufd, MigrationIncomingState *mis) } #ifdef UFFD_FEATURE_THREAD_ID - if (migrate_postcopy_blocktime() && mis && - UFFD_FEATURE_THREAD_ID & supported_features) { - /* kernel supports that feature */ - /* don't create blocktime_context if it exists */ - if (!mis->blocktime_ctx) { - mis->blocktime_ctx = blocktime_context_new(); - } - + if (UFFD_FEATURE_THREAD_ID & supported_features) { asked_features |= UFFD_FEATURE_THREAD_ID; + if (migrate_postcopy_blocktime()) { + if (!mis->blocktime_ctx) { + mis->blocktime_ctx = blocktime_context_new(); + } + } } #endif @@ -525,6 +523,19 @@ int postcopy_ram_incoming_init(MigrationIncomingState *mis) return 0; } +static void postcopy_temp_pages_cleanup(MigrationIncomingState *mis) +{ + if (mis->postcopy_tmp_page) { + munmap(mis->postcopy_tmp_page, mis->largest_page_size); + mis->postcopy_tmp_page = NULL; + } + + if (mis->postcopy_tmp_zero_page) { + munmap(mis->postcopy_tmp_zero_page, mis->largest_page_size); + mis->postcopy_tmp_zero_page = NULL; + } +} + /* * At the end of a migration where postcopy_ram_incoming_init was called. */ @@ -566,14 +577,8 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis) } } - if (mis->postcopy_tmp_page) { - munmap(mis->postcopy_tmp_page, mis->largest_page_size); - mis->postcopy_tmp_page = NULL; - } - if (mis->postcopy_tmp_zero_page) { - munmap(mis->postcopy_tmp_zero_page, mis->largest_page_size); - mis->postcopy_tmp_zero_page = NULL; - } + postcopy_temp_pages_cleanup(mis); + trace_postcopy_ram_incoming_cleanup_blocktime( get_postcopy_total_blocktime()); @@ -1084,6 +1089,40 @@ retry: return NULL; } +static int postcopy_temp_pages_setup(MigrationIncomingState *mis) +{ + int err; + + mis->postcopy_tmp_page = mmap(NULL, mis->largest_page_size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (mis->postcopy_tmp_page == MAP_FAILED) { + err = errno; + mis->postcopy_tmp_page = NULL; + error_report("%s: Failed to map postcopy_tmp_page %s", + __func__, strerror(err)); + return -err; + } + + /* + * Map large zero page when kernel can't use UFFDIO_ZEROPAGE for hugepages + */ + mis->postcopy_tmp_zero_page = mmap(NULL, mis->largest_page_size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (mis->postcopy_tmp_zero_page == MAP_FAILED) { + err = errno; + mis->postcopy_tmp_zero_page = NULL; + error_report("%s: Failed to map large zero page %s", + __func__, strerror(err)); + return -err; + } + + memset(mis->postcopy_tmp_zero_page, '\0', mis->largest_page_size); + + return 0; +} + int postcopy_ram_incoming_setup(MigrationIncomingState *mis) { /* Open the fd for the kernel to give us userfaults */ @@ -1124,32 +1163,11 @@ int postcopy_ram_incoming_setup(MigrationIncomingState *mis) return -1; } - mis->postcopy_tmp_page = mmap(NULL, mis->largest_page_size, - PROT_READ | PROT_WRITE, MAP_PRIVATE | - MAP_ANONYMOUS, -1, 0); - if (mis->postcopy_tmp_page == MAP_FAILED) { - mis->postcopy_tmp_page = NULL; - error_report("%s: Failed to map postcopy_tmp_page %s", - __func__, strerror(errno)); + if (postcopy_temp_pages_setup(mis)) { + /* Error dumped in the sub-function */ return -1; } - /* - * Map large zero page when kernel can't use UFFDIO_ZEROPAGE for hugepages - */ - mis->postcopy_tmp_zero_page = mmap(NULL, mis->largest_page_size, - PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, - -1, 0); - if (mis->postcopy_tmp_zero_page == MAP_FAILED) { - int e = errno; - mis->postcopy_tmp_zero_page = NULL; - error_report("%s: Failed to map large zero page %s", - __func__, strerror(e)); - return -e; - } - memset(mis->postcopy_tmp_zero_page, '\0', mis->largest_page_size); - trace_postcopy_ram_enable_notify(); return 0; diff --git a/migration/ram.c b/migration/ram.c index 57efa67..91ca743 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -325,7 +325,8 @@ struct RAMState { uint64_t xbzrle_bytes_prev; /* Start using XBZRLE (e.g., after the first round). */ bool xbzrle_enabled; - + /* Are we on the last stage of migration */ + bool last_stage; /* compression statistics since the beginning of the period */ /* amount of count that no free thread to compress data */ uint64_t compress_thread_busy_prev; @@ -354,6 +355,12 @@ static RAMState *ram_state; static NotifierWithReturnList precopy_notifier_list; +/* Whether postcopy has queued requests? */ +static bool postcopy_has_request(RAMState *rs) +{ + return !QSIMPLEQ_EMPTY_ATOMIC(&rs->src_page_requests); +} + void precopy_infrastructure_init(void) { notifier_with_return_list_init(&precopy_notifier_list); @@ -386,6 +393,18 @@ uint64_t ram_bytes_remaining(void) MigrationStats ram_counters; +static void ram_transferred_add(uint64_t bytes) +{ + if (runstate_is_running()) { + ram_counters.precopy_bytes += bytes; + } else if (migration_in_postcopy()) { + ram_counters.postcopy_bytes += bytes; + } else { + ram_counters.downtime_bytes += bytes; + } + ram_counters.transferred += bytes; +} + /* used by the search for pages to send */ struct PageSearchStatus { /* Current block being searched */ @@ -683,11 +702,10 @@ static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr) * @current_addr: addr of the page * @block: block that contains the page we want to send * @offset: offset inside the block for the page - * @last_stage: if we are at the completion stage */ static int save_xbzrle_page(RAMState *rs, uint8_t **current_data, ram_addr_t current_addr, RAMBlock *block, - ram_addr_t offset, bool last_stage) + ram_addr_t offset) { int encoded_len = 0, bytes_xbzrle; uint8_t *prev_cached_page; @@ -695,7 +713,7 @@ static int save_xbzrle_page(RAMState *rs, uint8_t **current_data, if (!cache_is_cached(XBZRLE.cache, current_addr, ram_counters.dirty_sync_count)) { xbzrle_counters.cache_miss++; - if (!last_stage) { + if (!rs->last_stage) { if (cache_insert(XBZRLE.cache, current_addr, *current_data, ram_counters.dirty_sync_count) == -1) { return -1; @@ -734,7 +752,7 @@ static int save_xbzrle_page(RAMState *rs, uint8_t **current_data, * Update the cache contents, so that it corresponds to the data * sent, in all cases except where we skip the page. */ - if (!last_stage && encoded_len != 0) { + if (!rs->last_stage && encoded_len != 0) { memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE); /* * In the case where we couldn't compress, ensure that the caller @@ -767,7 +785,7 @@ static int save_xbzrle_page(RAMState *rs, uint8_t **current_data, * RAM_SAVE_FLAG_CONTINUE. */ xbzrle_counters.bytes += bytes_xbzrle - 8; - ram_counters.transferred += bytes_xbzrle; + ram_transferred_add(bytes_xbzrle); return 1; } @@ -1158,6 +1176,15 @@ static void migration_bitmap_sync_precopy(RAMState *rs) } } +static void ram_release_page(const char *rbname, uint64_t offset) +{ + if (!migrate_release_ram() || !migration_in_postcopy()) { + return; + } + + ram_discard_range(rbname, offset, TARGET_PAGE_SIZE); +} + /** * save_zero_page_to_file: send the zero page to the file * @@ -1179,6 +1206,7 @@ static int save_zero_page_to_file(RAMState *rs, QEMUFile *file, len += save_page_header(rs, file, block, offset | RAM_SAVE_FLAG_ZERO); qemu_put_byte(file, 0); len += 1; + ram_release_page(block->idstr, offset); } return len; } @@ -1198,21 +1226,12 @@ static int save_zero_page(RAMState *rs, RAMBlock *block, ram_addr_t offset) if (len) { ram_counters.duplicate++; - ram_counters.transferred += len; + ram_transferred_add(len); return 1; } return -1; } -static void ram_release_pages(const char *rbname, uint64_t offset, int pages) -{ - if (!migrate_release_ram() || !migration_in_postcopy()) { - return; - } - - ram_discard_range(rbname, offset, ((ram_addr_t)pages) << TARGET_PAGE_BITS); -} - /* * @pages: the number of pages written by the control path, * < 0 - error @@ -1234,7 +1253,7 @@ static bool control_save_page(RAMState *rs, RAMBlock *block, ram_addr_t offset, } if (bytes_xmit) { - ram_counters.transferred += bytes_xmit; + ram_transferred_add(bytes_xmit); *pages = 1; } @@ -1265,8 +1284,8 @@ static bool control_save_page(RAMState *rs, RAMBlock *block, ram_addr_t offset, static int save_normal_page(RAMState *rs, RAMBlock *block, ram_addr_t offset, uint8_t *buf, bool async) { - ram_counters.transferred += save_page_header(rs, rs->f, block, - offset | RAM_SAVE_FLAG_PAGE); + ram_transferred_add(save_page_header(rs, rs->f, block, + offset | RAM_SAVE_FLAG_PAGE)); if (async) { qemu_put_buffer_async(rs->f, buf, TARGET_PAGE_SIZE, migrate_release_ram() & @@ -1274,7 +1293,7 @@ static int save_normal_page(RAMState *rs, RAMBlock *block, ram_addr_t offset, } else { qemu_put_buffer(rs->f, buf, TARGET_PAGE_SIZE); } - ram_counters.transferred += TARGET_PAGE_SIZE; + ram_transferred_add(TARGET_PAGE_SIZE); ram_counters.normal++; return 1; } @@ -1290,9 +1309,8 @@ static int save_normal_page(RAMState *rs, RAMBlock *block, ram_addr_t offset, * @rs: current RAM state * @block: block that contains the page we want to send * @offset: offset inside the block for the page - * @last_stage: if we are at the completion stage */ -static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage) +static int ram_save_page(RAMState *rs, PageSearchStatus *pss) { int pages = -1; uint8_t *p; @@ -1307,8 +1325,8 @@ static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage) XBZRLE_cache_lock(); if (rs->xbzrle_enabled && !migration_in_postcopy()) { pages = save_xbzrle_page(rs, &p, current_addr, block, - offset, last_stage); - if (!last_stage) { + offset); + if (!rs->last_stage) { /* Can't send this cached data async, since the cache page * might get updated before it gets to the wire */ @@ -1341,13 +1359,11 @@ static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block, ram_addr_t offset, uint8_t *source_buf) { RAMState *rs = ram_state; - uint8_t *p = block->host + (offset & TARGET_PAGE_MASK); - bool zero_page = false; + uint8_t *p = block->host + offset; int ret; if (save_zero_page_to_file(rs, f, block, offset)) { - zero_page = true; - goto exit; + return true; } save_page_header(rs, f, block, offset | RAM_SAVE_FLAG_COMPRESS_PAGE); @@ -1362,18 +1378,14 @@ static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block, if (ret < 0) { qemu_file_set_error(migrate_get_current()->to_dst_file, ret); error_report("compressed data failed!"); - return false; } - -exit: - ram_release_pages(block->idstr, offset & TARGET_PAGE_MASK, 1); - return zero_page; + return false; } static void update_compress_thread_counts(const CompressParam *param, int bytes_xmit) { - ram_counters.transferred += bytes_xmit; + ram_transferred_add(bytes_xmit); if (param->zero_page) { ram_counters.duplicate++; @@ -1533,30 +1545,42 @@ static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again) */ static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset) { + struct RAMSrcPageRequest *entry; RAMBlock *block = NULL; + size_t page_size; - if (QSIMPLEQ_EMPTY_ATOMIC(&rs->src_page_requests)) { + if (!postcopy_has_request(rs)) { return NULL; } QEMU_LOCK_GUARD(&rs->src_page_req_mutex); - if (!QSIMPLEQ_EMPTY(&rs->src_page_requests)) { - struct RAMSrcPageRequest *entry = - QSIMPLEQ_FIRST(&rs->src_page_requests); - block = entry->rb; - *offset = entry->offset; - - if (entry->len > TARGET_PAGE_SIZE) { - entry->len -= TARGET_PAGE_SIZE; - entry->offset += TARGET_PAGE_SIZE; - } else { - memory_region_unref(block->mr); - QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req); - g_free(entry); - migration_consume_urgent_request(); - } + + /* + * This should _never_ change even after we take the lock, because no one + * should be taking anything off the request list other than us. + */ + assert(postcopy_has_request(rs)); + + entry = QSIMPLEQ_FIRST(&rs->src_page_requests); + block = entry->rb; + *offset = entry->offset; + page_size = qemu_ram_pagesize(block); + /* Each page request should only be multiple page size of the ramblock */ + assert((entry->len % page_size) == 0); + + if (entry->len > page_size) { + entry->len -= page_size; + entry->offset += page_size; + } else { + memory_region_unref(block->mr); + QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req); + g_free(entry); + migration_consume_urgent_request(); } + trace_unqueue_page(block->idstr, *offset, + test_bit((*offset >> TARGET_PAGE_BITS), block->bmap)); + return block; } @@ -1611,7 +1635,7 @@ static int ram_save_release_protection(RAMState *rs, PageSearchStatus *pss, /* Check if page is from UFFD-managed region. */ if (pss->block->flags & RAM_UF_WRITEPROTECT) { void *page_address = pss->block->host + (start_page << TARGET_PAGE_BITS); - uint64_t run_length = (pss->page - start_page + 1) << TARGET_PAGE_BITS; + uint64_t run_length = (pss->page - start_page) << TARGET_PAGE_BITS; /* Flush async buffers before un-protect. */ qemu_fflush(rs->f); @@ -1931,30 +1955,8 @@ static bool get_queued_page(RAMState *rs, PageSearchStatus *pss) { RAMBlock *block; ram_addr_t offset; - bool dirty; - - do { - block = unqueue_page(rs, &offset); - /* - * We're sending this page, and since it's postcopy nothing else - * will dirty it, and we must make sure it doesn't get sent again - * even if this queue request was received after the background - * search already sent it. - */ - if (block) { - unsigned long page; - page = offset >> TARGET_PAGE_BITS; - dirty = test_bit(page, block->bmap); - if (!dirty) { - trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset, - page); - } else { - trace_get_queued_page(block->idstr, (uint64_t)offset, page); - } - } - - } while (block && !dirty); + block = unqueue_page(rs, &offset); if (!block) { /* @@ -2129,10 +2131,8 @@ static bool save_compress_page(RAMState *rs, RAMBlock *block, ram_addr_t offset) * * @rs: current RAM state * @pss: data about the page we want to send - * @last_stage: if we are at the completion stage */ -static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss, - bool last_stage) +static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss) { RAMBlock *block = pss->block; ram_addr_t offset = ((ram_addr_t)pss->page) << TARGET_PAGE_BITS; @@ -2156,7 +2156,6 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss, xbzrle_cache_zero_page(rs, block->offset + offset); XBZRLE_cache_unlock(); } - ram_release_pages(block->idstr, offset, res); return res; } @@ -2171,7 +2170,7 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss, return ram_save_multifd_page(rs, block, offset); } - return ram_save_page(rs, pss, last_stage); + return ram_save_page(rs, pss); } /** @@ -2188,12 +2187,9 @@ static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss, * Returns the number of pages written or negative on error * * @rs: current RAM state - * @ms: current migration state * @pss: data about the page we want to send - * @last_stage: if we are at the completion stage */ -static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss, - bool last_stage) +static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss) { int tmppages, pages = 0; size_t pagesize_bits = @@ -2211,7 +2207,7 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss, do { /* Check the pages is dirty and if it is send it */ if (migration_bitmap_clear_dirty(rs, pss->block, pss->page)) { - tmppages = ram_save_target_page(rs, pss, last_stage); + tmppages = ram_save_target_page(rs, pss); if (tmppages < 0) { return tmppages; } @@ -2230,7 +2226,7 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss, offset_in_ramblock(pss->block, ((ram_addr_t)pss->page) << TARGET_PAGE_BITS)); /* The offset we leave with is the min boundary of host page and block */ - pss->page = MIN(pss->page, hostpage_boundary) - 1; + pss->page = MIN(pss->page, hostpage_boundary); res = ram_save_release_protection(rs, pss, start_page); return (res < 0 ? res : pages); @@ -2245,13 +2241,11 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss, * or negative on error * * @rs: current RAM state - * @last_stage: if we are at the completion stage * * On systems where host-page-size > target-page-size it will send all the * pages in a host page that are dirty. */ - -static int ram_find_and_save_block(RAMState *rs, bool last_stage) +static int ram_find_and_save_block(RAMState *rs) { PageSearchStatus pss; int pages = 0; @@ -2280,7 +2274,7 @@ static int ram_find_and_save_block(RAMState *rs, bool last_stage) } if (found) { - pages = ram_save_host_page(rs, &pss, last_stage); + pages = ram_save_host_page(rs, &pss); } } while (!pages && again); @@ -2298,7 +2292,7 @@ void acct_update_position(QEMUFile *f, size_t size, bool zero) ram_counters.duplicate += pages; } else { ram_counters.normal += pages; - ram_counters.transferred += size; + ram_transferred_add(size); qemu_update_position(f, size); } } @@ -2408,40 +2402,6 @@ static void ram_state_reset(RAMState *rs) #define MAX_WAIT 50 /* ms, half buffered_file limit */ -/* - * 'expected' is the value you expect the bitmap mostly to be full - * of; it won't bother printing lines that are all this value. - * If 'todump' is null the migration bitmap is dumped. - */ -void ram_debug_dump_bitmap(unsigned long *todump, bool expected, - unsigned long pages) -{ - int64_t cur; - int64_t linelen = 128; - char linebuf[129]; - - for (cur = 0; cur < pages; cur += linelen) { - int64_t curb; - bool found = false; - /* - * Last line; catch the case where the line length - * is longer than remaining ram - */ - if (cur + linelen > pages) { - linelen = pages - cur; - } - for (curb = 0; curb < linelen; curb++) { - bool thisbit = test_bit(cur + curb, todump); - linebuf[curb] = thisbit ? '1' : '.'; - found = found || (thisbit != expected); - } - if (found) { - linebuf[curb] = '\0'; - fprintf(stderr, "0x%08" PRIx64 " : %s\n", cur, linebuf); - } - } -} - /* **** functions for postcopy ***** */ void ram_postcopy_migrated_memory_release(MigrationState *ms) @@ -2467,14 +2427,12 @@ void ram_postcopy_migrated_memory_release(MigrationState *ms) /** * postcopy_send_discard_bm_ram: discard a RAMBlock * - * Returns zero on success - * * Callback from postcopy_each_ram_send_discard for each RAMBlock * * @ms: current migration state * @block: RAMBlock to discard */ -static int postcopy_send_discard_bm_ram(MigrationState *ms, RAMBlock *block) +static void postcopy_send_discard_bm_ram(MigrationState *ms, RAMBlock *block) { unsigned long end = block->used_length >> TARGET_PAGE_BITS; unsigned long current; @@ -2498,15 +2456,13 @@ static int postcopy_send_discard_bm_ram(MigrationState *ms, RAMBlock *block) postcopy_discard_send_range(ms, one, discard_length); current = one + discard_length; } - - return 0; } +static void postcopy_chunk_hostpages_pass(MigrationState *ms, RAMBlock *block); + /** * postcopy_each_ram_send_discard: discard all RAMBlocks * - * Returns 0 for success or negative for error - * * Utility for the outgoing postcopy code. * Calls postcopy_send_discard_bm_ram for each RAMBlock * passing it bitmap indexes and name. @@ -2515,27 +2471,29 @@ static int postcopy_send_discard_bm_ram(MigrationState *ms, RAMBlock *block) * * @ms: current migration state */ -static int postcopy_each_ram_send_discard(MigrationState *ms) +static void postcopy_each_ram_send_discard(MigrationState *ms) { struct RAMBlock *block; - int ret; RAMBLOCK_FOREACH_NOT_IGNORED(block) { postcopy_discard_send_init(ms, block->idstr); /* + * Deal with TPS != HPS and huge pages. It discard any partially sent + * host-page size chunks, mark any partially dirty host-page size + * chunks as all dirty. In this case the host-page is the host-page + * for the particular RAMBlock, i.e. it might be a huge page. + */ + postcopy_chunk_hostpages_pass(ms, block); + + /* * Postcopy sends chunks of bitmap over the wire, but it * just needs indexes at this point, avoids it having * target page specific code. */ - ret = postcopy_send_discard_bm_ram(ms, block); + postcopy_send_discard_bm_ram(ms, block); postcopy_discard_send_finish(ms); - if (ret) { - return ret; - } } - - return 0; } /** @@ -2606,37 +2564,8 @@ static void postcopy_chunk_hostpages_pass(MigrationState *ms, RAMBlock *block) } /** - * postcopy_chunk_hostpages: discard any partially sent host page - * - * Utility for the outgoing postcopy code. - * - * Discard any partially sent host-page size chunks, mark any partially - * dirty host-page size chunks as all dirty. In this case the host-page - * is the host-page for the particular RAMBlock, i.e. it might be a huge page - * - * Returns zero on success - * - * @ms: current migration state - * @block: block we want to work with - */ -static int postcopy_chunk_hostpages(MigrationState *ms, RAMBlock *block) -{ - postcopy_discard_send_init(ms, block->idstr); - - /* - * Ensure that all partially dirty host pages are made fully dirty. - */ - postcopy_chunk_hostpages_pass(ms, block); - - postcopy_discard_send_finish(ms); - return 0; -} - -/** * ram_postcopy_send_discard_bitmap: transmit the discard bitmap * - * Returns zero on success - * * Transmit the set of pages to be discarded after precopy to the target * these are pages that: * a) Have been previously transmitted but are now dirty again @@ -2647,11 +2576,9 @@ static int postcopy_chunk_hostpages(MigrationState *ms, RAMBlock *block) * * @ms: current migration state */ -int ram_postcopy_send_discard_bitmap(MigrationState *ms) +void ram_postcopy_send_discard_bitmap(MigrationState *ms) { RAMState *rs = ram_state; - RAMBlock *block; - int ret; RCU_READ_LOCK_GUARD(); @@ -2663,21 +2590,9 @@ int ram_postcopy_send_discard_bitmap(MigrationState *ms) rs->last_sent_block = NULL; rs->last_page = 0; - RAMBLOCK_FOREACH_NOT_IGNORED(block) { - /* Deal with TPS != HPS and huge pages */ - ret = postcopy_chunk_hostpages(ms, block); - if (ret) { - return ret; - } + postcopy_each_ram_send_discard(ms); -#ifdef DEBUG_POSTCOPY - ram_debug_dump_bitmap(block->bmap, true, - block->used_length >> TARGET_PAGE_BITS); -#endif - } trace_ram_postcopy_send_discard_bitmap(); - - return postcopy_each_ram_send_discard(ms); } /** @@ -3073,14 +2988,14 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); i = 0; while ((ret = qemu_file_rate_limit(f)) == 0 || - !QSIMPLEQ_EMPTY(&rs->src_page_requests)) { + postcopy_has_request(rs)) { int pages; if (qemu_file_get_error(f)) { break; } - pages = ram_find_and_save_block(rs, false); + pages = ram_find_and_save_block(rs); /* no more pages to sent */ if (pages == 0) { done = 1; @@ -3133,7 +3048,7 @@ out: multifd_send_sync_main(rs->f); qemu_put_be64(f, RAM_SAVE_FLAG_EOS); qemu_fflush(f); - ram_counters.transferred += 8; + ram_transferred_add(8); ret = qemu_file_get_error(f); } @@ -3160,6 +3075,8 @@ static int ram_save_complete(QEMUFile *f, void *opaque) RAMState *rs = *temp; int ret = 0; + rs->last_stage = !migration_in_colo_state(); + WITH_RCU_READ_LOCK_GUARD() { if (!migration_in_postcopy()) { migration_bitmap_sync_precopy(rs); @@ -3173,7 +3090,7 @@ static int ram_save_complete(QEMUFile *f, void *opaque) while (true) { int pages; - pages = ram_find_and_save_block(rs, !migration_in_colo_state()); + pages = ram_find_and_save_block(rs); /* no more blocks to sent */ if (pages == 0) { break; diff --git a/migration/ram.h b/migration/ram.h index c515396..2c6dc36 100644 --- a/migration/ram.h +++ b/migration/ram.h @@ -55,11 +55,9 @@ void mig_throttle_counter_reset(void); uint64_t ram_pagesize_summary(void); int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len); void acct_update_position(QEMUFile *f, size_t size, bool zero); -void ram_debug_dump_bitmap(unsigned long *todump, bool expected, - unsigned long pages); void ram_postcopy_migrated_memory_release(MigrationState *ms); /* For outgoing discard bitmap */ -int ram_postcopy_send_discard_bitmap(MigrationState *ms); +void ram_postcopy_send_discard_bitmap(MigrationState *ms); /* For incoming postcopy discard */ int ram_discard_range(const char *block_name, uint64_t start, size_t length); int ram_postcopy_incoming_init(MigrationIncomingState *mis); diff --git a/migration/savevm.c b/migration/savevm.c index 0bef031..1599b02 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -1298,8 +1298,9 @@ int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy) save_section_footer(f, se); if (ret < 0) { - error_report("failed to save SaveStateEntry with id(name): %d(%s)", - se->section_id, se->idstr); + error_report("failed to save SaveStateEntry with id(name): " + "%d(%s): %d", + se->section_id, se->idstr, ret); qemu_file_set_error(f, ret); } if (ret <= 0) { diff --git a/migration/trace-events b/migration/trace-events index b48d873..48aa7b1 100644 --- a/migration/trace-events +++ b/migration/trace-events @@ -86,8 +86,6 @@ put_qlist_end(const char *field_name, const char *vmsd_name) "%s(%s)" qemu_file_fclose(void) "" # ram.c -get_queued_page(const char *block_name, uint64_t tmp_offset, unsigned long page_abs) "%s/0x%" PRIx64 " page_abs=0x%lx" -get_queued_page_not_dirty(const char *block_name, uint64_t tmp_offset, unsigned long page_abs) "%s/0x%" PRIx64 " page_abs=0x%lx" migration_bitmap_sync_start(void) "" migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64 migration_bitmap_clear_dirty(char *str, uint64_t start, uint64_t size, unsigned long page) "rb %s start 0x%"PRIx64" size 0x%"PRIx64" page 0x%lx" @@ -113,25 +111,26 @@ ram_save_iterate_big_wait(uint64_t milliconds, int iterations) "big wait: %" PRI ram_load_complete(int ret, uint64_t seq_iter) "exit_code %d seq iteration %" PRIu64 ram_write_tracking_ramblock_start(const char *block_id, size_t page_size, void *addr, size_t length) "%s: page_size: %zu addr: %p length: %zu" ram_write_tracking_ramblock_stop(const char *block_id, size_t page_size, void *addr, size_t length) "%s: page_size: %zu addr: %p length: %zu" +unqueue_page(char *block, uint64_t offset, bool dirty) "ramblock '%s' offset 0x%"PRIx64" dirty %d" # multifd.c -multifd_new_send_channel_async(uint8_t id) "channel %d" -multifd_recv(uint8_t id, uint64_t packet_num, uint32_t used, uint32_t flags, uint32_t next_packet_size) "channel %d packet_num %" PRIu64 " pages %d flags 0x%x next packet size %d" -multifd_recv_new_channel(uint8_t id) "channel %d" +multifd_new_send_channel_async(uint8_t id) "channel %u" +multifd_recv(uint8_t id, uint64_t packet_num, uint32_t used, uint32_t flags, uint32_t next_packet_size) "channel %u packet_num %" PRIu64 " pages %u flags 0x%x next packet size %u" +multifd_recv_new_channel(uint8_t id) "channel %u" multifd_recv_sync_main(long packet_num) "packet num %ld" -multifd_recv_sync_main_signal(uint8_t id) "channel %d" -multifd_recv_sync_main_wait(uint8_t id) "channel %d" +multifd_recv_sync_main_signal(uint8_t id) "channel %u" +multifd_recv_sync_main_wait(uint8_t id) "channel %u" multifd_recv_terminate_threads(bool error) "error %d" -multifd_recv_thread_end(uint8_t id, uint64_t packets, uint64_t pages) "channel %d packets %" PRIu64 " pages %" PRIu64 -multifd_recv_thread_start(uint8_t id) "%d" -multifd_send(uint8_t id, uint64_t packet_num, uint32_t used, uint32_t flags, uint32_t next_packet_size) "channel %d packet_num %" PRIu64 " pages %d flags 0x%x next packet size %d" -multifd_send_error(uint8_t id) "channel %d" +multifd_recv_thread_end(uint8_t id, uint64_t packets, uint64_t pages) "channel %u packets %" PRIu64 " pages %" PRIu64 +multifd_recv_thread_start(uint8_t id) "%u" +multifd_send(uint8_t id, uint64_t packet_num, uint32_t normal, uint32_t flags, uint32_t next_packet_size) "channel %u packet_num %" PRIu64 " normal pages %u flags 0x%x next packet size %u" +multifd_send_error(uint8_t id) "channel %u" multifd_send_sync_main(long packet_num) "packet num %ld" -multifd_send_sync_main_signal(uint8_t id) "channel %d" -multifd_send_sync_main_wait(uint8_t id) "channel %d" +multifd_send_sync_main_signal(uint8_t id) "channel %u" +multifd_send_sync_main_wait(uint8_t id) "channel %u" multifd_send_terminate_threads(bool error) "error %d" -multifd_send_thread_end(uint8_t id, uint64_t packets, uint64_t pages) "channel %d packets %" PRIu64 " pages %" PRIu64 -multifd_send_thread_start(uint8_t id) "%d" +multifd_send_thread_end(uint8_t id, uint64_t packets, uint64_t normal_pages) "channel %u packets %" PRIu64 " normal pages %" PRIu64 +multifd_send_thread_start(uint8_t id) "%u" multifd_tls_outgoing_handshake_start(void *ioc, void *tioc, const char *hostname) "ioc=%p tioc=%p hostname=%s" multifd_tls_outgoing_handshake_error(void *ioc, const char *err) "ioc=%p err=%s" multifd_tls_outgoing_handshake_complete(void *ioc) "ioc=%p" diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c index 2669156..8c384dc 100644 --- a/monitor/hmp-cmds.c +++ b/monitor/hmp-cmds.c @@ -293,6 +293,18 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) monitor_printf(mon, "postcopy request count: %" PRIu64 "\n", info->ram->postcopy_requests); } + if (info->ram->precopy_bytes) { + monitor_printf(mon, "precopy ram: %" PRIu64 " kbytes\n", + info->ram->precopy_bytes >> 10); + } + if (info->ram->downtime_bytes) { + monitor_printf(mon, "downtime ram: %" PRIu64 " kbytes\n", + info->ram->downtime_bytes >> 10); + } + if (info->ram->postcopy_bytes) { + monitor_printf(mon, "postcopy ram: %" PRIu64 " kbytes\n", + info->ram->postcopy_bytes >> 10); + } } if (info->has_disk) { diff --git a/qapi/migration.json b/qapi/migration.json index bbfd48c..5975a0e 100644 --- a/qapi/migration.json +++ b/qapi/migration.json @@ -46,6 +46,15 @@ # @pages-per-second: the number of memory pages transferred per second # (Since 4.0) # +# @precopy-bytes: The number of bytes sent in the pre-copy phase +# (since 7.0). +# +# @downtime-bytes: The number of bytes sent while the guest is paused +# (since 7.0). +# +# @postcopy-bytes: The number of bytes sent during the post-copy phase +# (since 7.0). +# # Since: 0.14 ## { 'struct': 'MigrationStats', @@ -54,7 +63,9 @@ 'normal-bytes': 'int', 'dirty-pages-rate' : 'int', 'mbps' : 'number', 'dirty-sync-count' : 'int', 'postcopy-requests' : 'int', 'page-size' : 'int', - 'multifd-bytes' : 'uint64', 'pages-per-second' : 'uint64' } } + 'multifd-bytes' : 'uint64', 'pages-per-second' : 'uint64', + 'precopy-bytes' : 'uint64', 'downtime-bytes' : 'uint64', + 'postcopy-bytes' : 'uint64' } } ## # @XBZRLECacheStats: diff --git a/target/openrisc/machine.c b/target/openrisc/machine.c index 6239725..b7d7388 100644 --- a/target/openrisc/machine.c +++ b/target/openrisc/machine.c @@ -25,7 +25,6 @@ static const VMStateDescription vmstate_tlb_entry = { .name = "tlb_entry", .version_id = 1, .minimum_version_id = 1, - .minimum_version_id_old = 1, .fields = (VMStateField[]) { VMSTATE_UINTTL(mr, OpenRISCTLBEntry), VMSTATE_UINTTL(tr, OpenRISCTLBEntry), diff --git a/target/ppc/machine.c b/target/ppc/machine.c index 733a22d..a503e00 100644 --- a/target/ppc/machine.c +++ b/target/ppc/machine.c @@ -421,7 +421,6 @@ static const VMStateDescription vmstate_tm = { .name = "cpu/tm", .version_id = 1, .minimum_version_id = 1, - .minimum_version_id_old = 1, .needed = tm_needed, .fields = (VMStateField []) { VMSTATE_UINTTL_ARRAY(env.tm_gpr, PowerPCCPU, 32), @@ -672,7 +671,6 @@ const VMStateDescription vmstate_ppc_cpu = { .name = "cpu", .version_id = 5, .minimum_version_id = 5, - .minimum_version_id_old = 4, .pre_save = cpu_pre_save, .post_load = cpu_post_load, .fields = (VMStateField[]) { diff --git a/target/sparc/machine.c b/target/sparc/machine.c index 917375c..44b9e7d 100644 --- a/target/sparc/machine.c +++ b/target/sparc/machine.c @@ -10,7 +10,6 @@ static const VMStateDescription vmstate_cpu_timer = { .name = "cpu_timer", .version_id = 1, .minimum_version_id = 1, - .minimum_version_id_old = 1, .fields = (VMStateField[]) { VMSTATE_UINT32(frequency, CPUTimer), VMSTATE_UINT32(disabled, CPUTimer), @@ -30,7 +29,6 @@ static const VMStateDescription vmstate_trap_state = { .name = "trap_state", .version_id = 1, .minimum_version_id = 1, - .minimum_version_id_old = 1, .fields = (VMStateField[]) { VMSTATE_UINT64(tpc, trap_state), VMSTATE_UINT64(tnpc, trap_state), @@ -44,7 +42,6 @@ static const VMStateDescription vmstate_tlb_entry = { .name = "tlb_entry", .version_id = 1, .minimum_version_id = 1, - .minimum_version_id_old = 1, .fields = (VMStateField[]) { VMSTATE_UINT64(tag, SparcTLBEntry), VMSTATE_UINT64(tte, SparcTLBEntry), @@ -113,7 +110,6 @@ const VMStateDescription vmstate_sparc_cpu = { .name = "cpu", .version_id = SPARC_VMSTATE_VER, .minimum_version_id = SPARC_VMSTATE_VER, - .minimum_version_id_old = SPARC_VMSTATE_VER, .pre_save = cpu_pre_save, .fields = (VMStateField[]) { VMSTATE_UINTTL_ARRAY(env.gregs, SPARCCPU, 8), |