COLO: Optimize memory back-up process

This patch will reduce the downtime of VM for the initial process, Previously, we copied all these memory in preparing stage of COLO while we need to stop VM, which is a time-consuming process. Here we optimize it by a trick, back-up every page while in migration process while COLO is enabled, though it affects the speed of the migration, but it obviously reduce the downtime of back-up all SVM'S memory in COLO preparing stage. Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com> Message-Id: <20200224065414.36524-5-zhang.zhanghailiang@huawei.com> Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com> minor typo fixes
author: zhanghailiang <zhang.zhanghailiang@huawei.com> 2020-02-24 14:54:10 +0800
committer: Dr. David Alan Gilbert <dgilbert@redhat.com> 2020-03-13 09:36:30 +0000
commit: 0393031a16735835a441b6d6e0495a1bd14adb90 (patch)
tree: f1d07c287e6283f3b2dd6e06684746a7ee655db9 /migration
parent: dc14a470763c96fd9d360e1028ce38e8c3613a77 (diff)
download: qemu-0393031a16735835a441b6d6e0495a1bd14adb90.zip
qemu-0393031a16735835a441b6d6e0495a1bd14adb90.tar.gz
qemu-0393031a16735835a441b6d6e0495a1bd14adb90.tar.bz2
3 files changed, 53 insertions, 17 deletions
diff --git a/migration/colo.c b/migration/colo.c
index 93c5a45..44942c4 100644
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -26,6 +26,7 @@
 #include "qemu/main-loop.h"
 #include "qemu/rcu.h"
 #include "migration/failover.h"
+#include "migration/ram.h"
 #ifdef CONFIG_REPLICATION
 #include "replication.h"
 #endif
@@ -845,6 +846,8 @@ void *colo_process_incoming_thread(void *opaque)
      */
     qemu_file_set_blocking(mis->from_src_file, true);
 
+    colo_incoming_start_dirty_log();
+
     bioc = qio_channel_buffer_new(COLO_BUFFER_BASE_SIZE);
     fb = qemu_fopen_channel_input(QIO_CHANNEL(bioc));
     object_unref(OBJECT(bioc));
diff --git a/migration/ram.c b/migration/ram.c
index 02cfd76..31f4a9d 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -2994,7 +2994,6 @@ int colo_init_ram_cache(void)
                 }
                 return -errno;
             }
-            memcpy(block->colo_cache, block->host, block->used_length);
         }
     }
 
@@ -3008,19 +3007,36 @@ int colo_init_ram_cache(void)
 
         RAMBLOCK_FOREACH_NOT_IGNORED(block) {
             unsigned long pages = block->max_length >> TARGET_PAGE_BITS;
-
             block->bmap = bitmap_new(pages);
-            bitmap_set(block->bmap, 0, pages);
         }
     }
-    ram_state = g_new0(RAMState, 1);
-    ram_state->migration_dirty_pages = 0;
-    qemu_mutex_init(&ram_state->bitmap_mutex);
-    memory_global_dirty_log_start();
 
+    ram_state_init(&ram_state);
     return 0;
 }
 
+/* TODO: duplicated with ram_init_bitmaps */
+void colo_incoming_start_dirty_log(void)
+{
+    RAMBlock *block = NULL;
+    /* For memory_global_dirty_log_start below. */
+    qemu_mutex_lock_iothread();
+    qemu_mutex_lock_ramlist();
+
+    memory_global_dirty_log_sync();
+    WITH_RCU_READ_LOCK_GUARD() {
+        RAMBLOCK_FOREACH_NOT_IGNORED(block) {
+            ramblock_sync_dirty_bitmap(ram_state, block);
+            /* Discard this dirty bitmap record */
+            bitmap_zero(block->bmap, block->max_length >> TARGET_PAGE_BITS);
+        }
+        memory_global_dirty_log_start();
+    }
+    ram_state->migration_dirty_pages = 0;
+    qemu_mutex_unlock_ramlist();
+    qemu_mutex_unlock_iothread();
+}
+
 /* It is need to hold the global lock to call this helper */
 void colo_release_ram_cache(void)
 {
@@ -3040,9 +3056,7 @@ void colo_release_ram_cache(void)
             }
         }
     }
-    qemu_mutex_destroy(&ram_state->bitmap_mutex);
-    g_free(ram_state);
-    ram_state = NULL;
+    ram_state_cleanup(&ram_state);
 }
 
 /**
@@ -3356,7 +3370,7 @@ static int ram_load_precopy(QEMUFile *f)
 
     while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
         ram_addr_t addr, total_ram_bytes;
-        void *host = NULL;
+        void *host = NULL, *host_bak = NULL;
         uint8_t ch;
 
         /*
@@ -3387,20 +3401,35 @@ static int ram_load_precopy(QEMUFile *f)
                      RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
             RAMBlock *block = ram_block_from_stream(f, flags);
 
+            host = host_from_ram_block_offset(block, addr);
             /*
-             * After going into COLO, we should load the Page into colo_cache.
+             * After going into COLO stage, we should not load the page
+             * into SVM's memory directly, we put them into colo_cache firstly.
+             * NOTE: We need to keep a copy of SVM's ram in colo_cache.
+             * Previously, we copied all these memory in preparing stage of COLO
+             * while we need to stop VM, which is a time-consuming process.
+             * Here we optimize it by a trick, back-up every page while in
+             * migration process while COLO is enabled, though it affects the
+             * speed of the migration, but it obviously reduce the downtime of
+             * back-up all SVM'S memory in COLO preparing stage.
              */
-            if (migration_incoming_in_colo_state()) {
-                host = colo_cache_from_block_offset(block, addr);
-            } else {
-                host = host_from_ram_block_offset(block, addr);
+            if (migration_incoming_colo_enabled()) {
+                if (migration_incoming_in_colo_state()) {
+                    /* In COLO stage, put all pages into cache temporarily */
+                    host = colo_cache_from_block_offset(block, addr);
+                } else {
+                   /*
+                    * In migration stage but before COLO stage,
+                    * Put all pages into both cache and SVM's memory.
+                    */
+                    host_bak = colo_cache_from_block_offset(block, addr);
+                }
             }
             if (!host) {
                 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
                 ret = -EINVAL;
                 break;
             }
-
             if (!migration_incoming_in_colo_state()) {
                 ramblock_recv_bitmap_set(block, host);
             }
@@ -3514,6 +3543,9 @@ static int ram_load_precopy(QEMUFile *f)
         if (!ret) {
             ret = qemu_file_get_error(f);
         }
+        if (!ret && host_bak) {
+            memcpy(host_bak, host, TARGET_PAGE_SIZE);
+        }
     }
 
     ret |= wait_for_decompress_done();
diff --git a/migration/ram.h b/migration/ram.h
index a553d40..5ceaff7 100644
--- a/migration/ram.h
+++ b/migration/ram.h
@@ -66,5 +66,6 @@ int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *rb);
 /* ram cache */
 int colo_init_ram_cache(void);
 void colo_release_ram_cache(void);
+void colo_incoming_start_dirty_log(void);
 
 #endif
author	zhanghailiang <zhang.zhanghailiang@huawei.com>	2020-02-24 14:54:10 +0800
committer	Dr. David Alan Gilbert <dgilbert@redhat.com>	2020-03-13 09:36:30 +0000
commit	0393031a16735835a441b6d6e0495a1bd14adb90 (patch)
tree	f1d07c287e6283f3b2dd6e06684746a7ee655db9 /migration
parent	dc14a470763c96fd9d360e1028ce38e8c3613a77 (diff)
download	qemu-0393031a16735835a441b6d6e0495a1bd14adb90.zip qemu-0393031a16735835a441b6d6e0495a1bd14adb90.tar.gz qemu-0393031a16735835a441b6d6e0495a1bd14adb90.tar.bz2