add non-arbitrary migration stop condition

Currently, we're entering migration's stage 3 when a treshold of 10 pages remain to be transferred in the system. This has hurt some users. However, any proposed threshold is arbitrary by nature, and would only shift the annoyance. The proposal of this patch is to define a max_downtime variable, which represents the maximum downtime a migration user is willing to suffer. Then, based on the bandwidth of last iteration, we calculate how much data we can transfer in such a window of time. Whenever we reach that value (or lower), we know is safe to enter stage3. This has largely improved the situation for me. On localhost migrations, where one would expect things to go as quickly as me running away from the duty of writting software for windows, a kernel compile was enough to get the migration stuck. It takes 20 ~ 30 iterations now. Signed-off-by: Glauber Costa <glommer@redhat.com> Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
author: Glauber Costa <glommer@redhat.com> 2009-05-28 15:22:57 -0400
committer: Anthony Liguori <aliguori@us.ibm.com> 2009-06-16 15:45:40 -0500
commit: a0a3fd60f66bfdef38da835e7382b0bfbe05bafc (patch)
tree: 828a90711bce2e7e9e3a26ff6f899a316052158b /vl.c
parent: 8c14c17395809dfb2b20bd3598e067d2c5af7dc4 (diff)
download: qemu-a0a3fd60f66bfdef38da835e7382b0bfbe05bafc.zip
qemu-a0a3fd60f66bfdef38da835e7382b0bfbe05bafc.tar.gz
qemu-a0a3fd60f66bfdef38da835e7382b0bfbe05bafc.tar.bz2
1 files changed, 17 insertions, 2 deletions
diff --git a/vl.c b/vl.c
index 1301f9f..3242c23 100644
--- a/vl.c
+++ b/vl.c
@@ -3188,7 +3188,6 @@ static int ram_save_block(QEMUFile *f)
     return found;
 }
 
-static ram_addr_t ram_save_threshold = 10;
 static uint64_t bytes_transferred = 0;
 
 static ram_addr_t ram_save_remaining(void)
@@ -3222,6 +3221,9 @@ uint64_t ram_bytes_total(void)
 static int ram_save_live(QEMUFile *f, int stage, void *opaque)
 {
     ram_addr_t addr;
+    uint64_t bytes_transferred_last;
+    double bwidth = 0;
+    uint64_t expected_time = 0;
 
     if (cpu_physical_sync_dirty_bitmap(0, TARGET_PHYS_ADDR_MAX) != 0) {
         qemu_file_set_error(f);
@@ -3241,6 +3243,9 @@ static int ram_save_live(QEMUFile *f, int stage, void *opaque)
         qemu_put_be64(f, last_ram_offset | RAM_SAVE_FLAG_MEM_SIZE);
     }
 
+    bytes_transferred_last = bytes_transferred;
+    bwidth = get_clock();
+
     while (!qemu_file_rate_limit(f)) {
         int ret;
 
@@ -3250,6 +3255,14 @@ static int ram_save_live(QEMUFile *f, int stage, void *opaque)
             break;
     }
 
+    bwidth = get_clock() - bwidth;
+    bwidth = (bytes_transferred - bytes_transferred_last) / bwidth;
+
+    /* if we haven't transferred anything this round, force expected_time to a
+     * a very high value, but without crashing */
+    if (bwidth == 0)
+        bwidth = 0.000001;
+
     /* try transferring iterative blocks of memory */
 
     if (stage == 3) {
@@ -3263,7 +3276,9 @@ static int ram_save_live(QEMUFile *f, int stage, void *opaque)
 
     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
 
-    return (stage == 2) && (ram_save_remaining() < ram_save_threshold);
+    expected_time = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth;
+
+    return (stage == 2) && (expected_time <= migrate_max_downtime());
 }
 
 static int ram_load_dead(QEMUFile *f, void *opaque)
author	Glauber Costa <glommer@redhat.com>	2009-05-28 15:22:57 -0400
committer	Anthony Liguori <aliguori@us.ibm.com>	2009-06-16 15:45:40 -0500
commit	a0a3fd60f66bfdef38da835e7382b0bfbe05bafc (patch)
tree	828a90711bce2e7e9e3a26ff6f899a316052158b /vl.c
parent	8c14c17395809dfb2b20bd3598e067d2c5af7dc4 (diff)
download	qemu-a0a3fd60f66bfdef38da835e7382b0bfbe05bafc.zip qemu-a0a3fd60f66bfdef38da835e7382b0bfbe05bafc.tar.gz qemu-a0a3fd60f66bfdef38da835e7382b0bfbe05bafc.tar.bz2