From 892ae715b6bc8107fccaa3caeb2a5bd4f6d2cb37 Mon Sep 17 00:00:00 2001
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Date: Wed, 27 Feb 2019 16:49:00 +0000
Subject: migration: Cleanup during exit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently we cleanup the migration object as we exit main after the
main_loop finishes; however if there's a migration running things
get messy and we can end up with the migration thread still trying
to access freed structures.

We now take a ref to the object around the migration thread itself,
so the act of dropping the ref during exit doesn't cause us to lose
the state until the thread quits.

Cancelling the migration during migration also tries to get the thread
to quit.

We do this a bit earlier; so hopefully migration gets out of the way
before all the devices etc are freed.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Tested-by: Alex Bennée <alex.bennee@linaro.org>
Message-Id: <20190227164900.16378-1-dgilbert@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
---
 include/migration/misc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/migration/misc.h b/include/migration/misc.h
index 0471e04..6f9df74 100644
--- a/include/migration/misc.h
+++ b/include/migration/misc.h
@@ -36,7 +36,7 @@ void dump_vmstate_json_to_file(FILE *out_fp);
 
 /* migration/migration.c */
 void migration_object_init(void);
-void migration_object_finalize(void);
+void migration_shutdown(void);
 void qemu_start_incoming_migration(const char *uri, Error **errp);
 bool migration_is_idle(void);
 void add_migration_state_change_notifier(Notifier *notify);
-- 
cgit v1.1


From 754cb9c0ebcf2164107baf39e75517e459887622 Mon Sep 17 00:00:00 2001
From: Yury Kotov <yury-kotov@yandex-team.ru>
Date: Fri, 15 Feb 2019 20:45:44 +0300
Subject: exec: Change RAMBlockIterFunc definition

Currently, qemu_ram_foreach_* calls RAMBlockIterFunc with many
block-specific arguments. But often iter func needs RAMBlock*.
This refactoring is needed for fast access to RAMBlock flags from
qemu_ram_foreach_block's callback. The only way to achieve this now
is to call qemu_ram_block_from_host (which also enumerates blocks).

So, this patch reduces complexity of
qemu_ram_foreach_block() -> cb() -> qemu_ram_block_from_host()
from O(n^2) to O(n).

Fix RAMBlockIterFunc definition and add some functions to read
RAMBlock* fields witch were passed.

Signed-off-by: Yury Kotov <yury-kotov@yandex-team.ru>
Message-Id: <20190215174548.2630-2-yury-kotov@yandex-team.ru>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
---
 include/exec/cpu-common.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index 63ec1f9..9cd1f94 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -72,6 +72,9 @@ ram_addr_t qemu_ram_block_host_offset(RAMBlock *rb, void *host);
 void qemu_ram_set_idstr(RAMBlock *block, const char *name, DeviceState *dev);
 void qemu_ram_unset_idstr(RAMBlock *block);
 const char *qemu_ram_get_idstr(RAMBlock *rb);
+void *qemu_ram_get_host_addr(RAMBlock *rb);
+ram_addr_t qemu_ram_get_offset(RAMBlock *rb);
+ram_addr_t qemu_ram_get_used_length(RAMBlock *rb);
 bool qemu_ram_is_shared(RAMBlock *rb);
 bool qemu_ram_is_uf_zeroable(RAMBlock *rb);
 void qemu_ram_set_uf_zeroable(RAMBlock *rb);
@@ -116,8 +119,7 @@ void cpu_flush_icache_range(hwaddr start, hwaddr len);
 extern struct MemoryRegion io_mem_rom;
 extern struct MemoryRegion io_mem_notdirty;
 
-typedef int (RAMBlockIterFunc)(const char *block_name, void *host_addr,
-    ram_addr_t offset, ram_addr_t length, void *opaque);
+typedef int (RAMBlockIterFunc)(RAMBlock *rb, void *opaque);
 
 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque);
 int qemu_ram_foreach_migratable_block(RAMBlockIterFunc func, void *opaque);
-- 
cgit v1.1


From fbd162e629aaf8a7e464af44d2f73d06b26428ad Mon Sep 17 00:00:00 2001
From: Yury Kotov <yury-kotov@yandex-team.ru>
Date: Fri, 15 Feb 2019 20:45:46 +0300
Subject: migration: Add an ability to ignore shared RAM blocks

If ignore-shared capability is set then skip shared RAMBlocks during the
RAM migration.
Also, move qemu_ram_foreach_migratable_block (and rename) to the
migration code, because it requires access to the migration capabilities.

Signed-off-by: Yury Kotov <yury-kotov@yandex-team.ru>
Message-Id: <20190215174548.2630-4-yury-kotov@yandex-team.ru>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
---
 include/exec/cpu-common.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index 9cd1f94..cef8b88 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -122,7 +122,6 @@ extern struct MemoryRegion io_mem_notdirty;
 typedef int (RAMBlockIterFunc)(RAMBlock *rb, void *opaque);
 
 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque);
-int qemu_ram_foreach_migratable_block(RAMBlockIterFunc func, void *opaque);
 int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length);
 
 #endif
-- 
cgit v1.1


From e7c91368d24637f077b457187f8dff4767dfadae Mon Sep 17 00:00:00 2001
From: Wei Wang <wei.w.wang@intel.com>
Date: Tue, 11 Dec 2018 16:24:47 +0800
Subject: bitmap: fix bitmap_count_one

BITMAP_LAST_WORD_MASK(nbits) returns 0xffffffff when "nbits=0", which
makes bitmap_count_one fail to handle the "nbits=0" case. It appears to be
preferred to remain BITMAP_LAST_WORD_MASK identical to the kernel
implementation that it is ported from.

So this patch fixes bitmap_count_one to handle the nbits=0 case.

Inital Discussion Link:
https://www.mail-archive.com/qemu-devel@nongnu.org/msg554316.html
Signed-off-by: Wei Wang <wei.w.wang@intel.com>
CC: Juan Quintela <quintela@redhat.com>
CC: Dr. David Alan Gilbert <dgilbert@redhat.com>
CC: Peter Xu <peterx@redhat.com>
Message-Id: <1544516693-5395-2-git-send-email-wei.w.wang@intel.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
---
 include/qemu/bitmap.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/qemu/bitmap.h b/include/qemu/bitmap.h
index 509eedd..679f1bd 100644
--- a/include/qemu/bitmap.h
+++ b/include/qemu/bitmap.h
@@ -221,6 +221,10 @@ static inline int bitmap_intersects(const unsigned long *src1,
 
 static inline long bitmap_count_one(const unsigned long *bitmap, long nbits)
 {
+    if (unlikely(!nbits)) {
+        return 0;
+    }
+
     if (small_nbits(nbits)) {
         return ctpopl(*bitmap & BITMAP_LAST_WORD_MASK(nbits));
     } else {
-- 
cgit v1.1


From 94960256ae0bc346de49e2ef3808778e8468caf5 Mon Sep 17 00:00:00 2001
From: Wei Wang <wei.w.wang@intel.com>
Date: Tue, 11 Dec 2018 16:24:48 +0800
Subject: bitmap: bitmap_count_one_with_offset

Count the number of 1s in a bitmap starting from an offset.

Signed-off-by: Wei Wang <wei.w.wang@intel.com>
CC: Dr. David Alan Gilbert <dgilbert@redhat.com>
CC: Juan Quintela <quintela@redhat.com>
CC: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Message-Id: <1544516693-5395-3-git-send-email-wei.w.wang@intel.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
---
 include/qemu/bitmap.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include')

diff --git a/include/qemu/bitmap.h b/include/qemu/bitmap.h
index 679f1bd..5c31334 100644
--- a/include/qemu/bitmap.h
+++ b/include/qemu/bitmap.h
@@ -232,6 +232,19 @@ static inline long bitmap_count_one(const unsigned long *bitmap, long nbits)
     }
 }
 
+static inline long bitmap_count_one_with_offset(const unsigned long *bitmap,
+                                                long offset, long nbits)
+{
+    long aligned_offset = QEMU_ALIGN_DOWN(offset, BITS_PER_LONG);
+    long redundant_bits = offset - aligned_offset;
+    long bits_to_count = nbits + redundant_bits;
+    const unsigned long *bitmap_start = bitmap +
+                                        aligned_offset / BITS_PER_LONG;
+
+    return bitmap_count_one(bitmap_start, bits_to_count) -
+           bitmap_count_one(bitmap_start, redundant_bits);
+}
+
 void bitmap_set(unsigned long *map, long i, long len);
 void bitmap_set_atomic(unsigned long *map, long i, long len);
 void bitmap_clear(unsigned long *map, long start, long nr);
-- 
cgit v1.1


From 6bcb05fc4211d2bd88fe73c65e93602428c51e5b Mon Sep 17 00:00:00 2001
From: Wei Wang <wei.w.wang@intel.com>
Date: Tue, 11 Dec 2018 16:24:50 +0800
Subject: migration: API to clear bits of guest free pages from the dirty
 bitmap

This patch adds an API to clear bits corresponding to guest free pages
from the dirty bitmap. Spilt the free page block if it crosses the QEMU
RAMBlock boundary.

Signed-off-by: Wei Wang <wei.w.wang@intel.com>
CC: Dr. David Alan Gilbert <dgilbert@redhat.com>
CC: Juan Quintela <quintela@redhat.com>
CC: Michael S. Tsirkin <mst@redhat.com>
CC: Peter Xu <peterx@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Message-Id: <1544516693-5395-5-git-send-email-wei.w.wang@intel.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
---
 include/migration/misc.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/migration/misc.h b/include/migration/misc.h
index 6f9df74..81ee347 100644
--- a/include/migration/misc.h
+++ b/include/migration/misc.h
@@ -14,12 +14,14 @@
 #ifndef MIGRATION_MISC_H
 #define MIGRATION_MISC_H
 
+#include "exec/cpu-common.h"
 #include "qemu/notify.h"
 #include "qapi/qapi-types-net.h"
 
 /* migration/ram.c */
 
 void ram_mig_init(void);
+void qemu_guest_free_page_hint(void *addr, size_t len);
 
 /* migration/block.c */
 
-- 
cgit v1.1


From bd2270608fa0112108aafcba89b87282c68db741 Mon Sep 17 00:00:00 2001
From: Wei Wang <wei.w.wang@intel.com>
Date: Tue, 11 Dec 2018 16:24:51 +0800
Subject: migration/ram.c: add a notifier chain for precopy

This patch adds a notifier chain for the memory precopy. This enables various
precopy optimizations to be invoked at specific places.

Signed-off-by: Wei Wang <wei.w.wang@intel.com>
CC: Dr. David Alan Gilbert <dgilbert@redhat.com>
CC: Juan Quintela <quintela@redhat.com>
CC: Michael S. Tsirkin <mst@redhat.com>
CC: Peter Xu <peterx@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Message-Id: <1544516693-5395-6-git-send-email-wei.w.wang@intel.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
---
 include/migration/misc.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'include')

diff --git a/include/migration/misc.h b/include/migration/misc.h
index 81ee347..dc46d33 100644
--- a/include/migration/misc.h
+++ b/include/migration/misc.h
@@ -20,6 +20,25 @@
 
 /* migration/ram.c */
 
+typedef enum PrecopyNotifyReason {
+    PRECOPY_NOTIFY_SETUP = 0,
+    PRECOPY_NOTIFY_BEFORE_BITMAP_SYNC = 1,
+    PRECOPY_NOTIFY_AFTER_BITMAP_SYNC = 2,
+    PRECOPY_NOTIFY_COMPLETE = 3,
+    PRECOPY_NOTIFY_CLEANUP = 4,
+    PRECOPY_NOTIFY_MAX = 5,
+} PrecopyNotifyReason;
+
+typedef struct PrecopyNotifyData {
+    enum PrecopyNotifyReason reason;
+    Error **errp;
+} PrecopyNotifyData;
+
+void precopy_infrastructure_init(void);
+void precopy_add_notifier(NotifierWithReturn *n);
+void precopy_remove_notifier(NotifierWithReturn *n);
+int precopy_notify(PrecopyNotifyReason reason, Error **errp);
+
 void ram_mig_init(void);
 void qemu_guest_free_page_hint(void *addr, size_t len);
 
-- 
cgit v1.1


From 6eeb63f740874150d7b5921541948c29b920a21d Mon Sep 17 00:00:00 2001
From: Wei Wang <wei.w.wang@intel.com>
Date: Tue, 11 Dec 2018 16:24:52 +0800
Subject: migration/ram.c: add the free page optimization enable flag

This patch adds the free page optimization enable flag, and a function
to set this flag. When the free page optimization is enabled, not all
the pages are needed to be sent in the bulk stage.

Why using a new flag, instead of directly disabling ram_bulk_stage when
the optimization is running?
Thanks for Peter Xu's reminder that disabling ram_bulk_stage will affect
the use of compression. Please see save_page_use_compression. When
xbzrle and compression are used, if free page optimizaion causes the
ram_bulk_stage to be disabled, save_page_use_compression will return
false, which disables the use of compression. That is, if free page
optimization avoids the sending of half of the guest pages, the other
half of pages loses the benefits of compression in the meantime. Using a
new flag to let migration_bitmap_find_dirty skip the free pages in the
bulk stage will avoid the above issue.

Signed-off-by: Wei Wang <wei.w.wang@intel.com>
CC: Dr. David Alan Gilbert <dgilbert@redhat.com>
CC: Juan Quintela <quintela@redhat.com>
CC: Michael S. Tsirkin <mst@redhat.com>
CC: Peter Xu <peterx@redhat.com>
Message-Id: <1544516693-5395-7-git-send-email-wei.w.wang@intel.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
---
 include/migration/misc.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/migration/misc.h b/include/migration/misc.h
index dc46d33..5cdbabd 100644
--- a/include/migration/misc.h
+++ b/include/migration/misc.h
@@ -38,6 +38,7 @@ void precopy_infrastructure_init(void);
 void precopy_add_notifier(NotifierWithReturn *n);
 void precopy_remove_notifier(NotifierWithReturn *n);
 int precopy_notify(PrecopyNotifyReason reason, Error **errp);
+void precopy_enable_free_page_optimization(void);
 
 void ram_mig_init(void);
 void qemu_guest_free_page_hint(void *addr, size_t len);
-- 
cgit v1.1


From c13c4153f76db23cac06a12044bf4dd346764059 Mon Sep 17 00:00:00 2001
From: Wei Wang <wei.w.wang@intel.com>
Date: Tue, 11 Dec 2018 16:24:53 +0800
Subject: virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_HINT

The new feature enables the virtio-balloon device to receive hints of
guest free pages from the free page vq.

A notifier is registered to the migration precopy notifier chain. The
notifier calls free_page_start after the migration thread syncs the dirty
bitmap, so that the free page optimization starts to clear bits of free
pages from the bitmap. It calls the free_page_stop before the migration
thread syncs the bitmap, which is the end of the current round of ram
save. The free_page_stop is also called to stop the optimization in the
case when there is an error occurred in the process of ram saving.

Note: balloon will report pages which were free at the time of this call.
As the reporting happens asynchronously, dirty bit logging must be
enabled before this free_page_start call is made. Guest reporting must be
disabled before the migration dirty bitmap is synchronized.

Signed-off-by: Wei Wang <wei.w.wang@intel.com>
CC: Michael S. Tsirkin <mst@redhat.com>
CC: Dr. David Alan Gilbert <dgilbert@redhat.com>
CC: Juan Quintela <quintela@redhat.com>
CC: Peter Xu <peterx@redhat.com>
Message-Id: <1544516693-5395-8-git-send-email-wei.w.wang@intel.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
  dgilbert: Dropped kernel header update, fixed up CMD_ID_* name change
---
 include/hw/virtio/virtio-balloon.h | 28 +++++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/hw/virtio/virtio-balloon.h b/include/hw/virtio/virtio-balloon.h
index 99dcd6d..1afafb1 100644
--- a/include/hw/virtio/virtio-balloon.h
+++ b/include/hw/virtio/virtio-balloon.h
@@ -17,11 +17,14 @@
 
 #include "standard-headers/linux/virtio_balloon.h"
 #include "hw/virtio/virtio.h"
+#include "sysemu/iothread.h"
 
 #define TYPE_VIRTIO_BALLOON "virtio-balloon-device"
 #define VIRTIO_BALLOON(obj) \
         OBJECT_CHECK(VirtIOBalloon, (obj), TYPE_VIRTIO_BALLOON)
 
+#define VIRTIO_BALLOON_FREE_PAGE_REPORT_CMD_ID_MIN 0x80000000
+
 typedef struct virtio_balloon_stat VirtIOBalloonStat;
 
 typedef struct virtio_balloon_stat_modern {
@@ -32,15 +35,38 @@ typedef struct virtio_balloon_stat_modern {
 
 typedef struct PartiallyBalloonedPage PartiallyBalloonedPage;
 
+enum virtio_balloon_free_page_report_status {
+    FREE_PAGE_REPORT_S_STOP = 0,
+    FREE_PAGE_REPORT_S_REQUESTED = 1,
+    FREE_PAGE_REPORT_S_START = 2,
+    FREE_PAGE_REPORT_S_DONE = 3,
+};
+
 typedef struct VirtIOBalloon {
     VirtIODevice parent_obj;
-    VirtQueue *ivq, *dvq, *svq;
+    VirtQueue *ivq, *dvq, *svq, *free_page_vq;
+    uint32_t free_page_report_status;
     uint32_t num_pages;
     uint32_t actual;
+    uint32_t free_page_report_cmd_id;
     uint64_t stats[VIRTIO_BALLOON_S_NR];
     VirtQueueElement *stats_vq_elem;
     size_t stats_vq_offset;
     QEMUTimer *stats_timer;
+    IOThread *iothread;
+    QEMUBH *free_page_bh;
+    /*
+     * Lock to synchronize threads to access the free page reporting related
+     * fields (e.g. free_page_report_status).
+     */
+    QemuMutex free_page_lock;
+    QemuCond  free_page_cond;
+    /*
+     * Set to block iothread to continue reading free page hints as the VM is
+     * stopped.
+     */
+    bool block_iothread;
+    NotifierWithReturn free_page_report_notify;
     int64_t stats_last_update;
     int64_t stats_poll_interval;
     uint32_t host_features;
-- 
cgit v1.1