aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch_init.c81
-rw-r--r--cpus.c29
-rw-r--r--include/migration/migration.h2
-rw-r--r--include/qemu-common.h1
-rw-r--r--include/qom/cpu.h10
-rw-r--r--migration.c9
-rw-r--r--qapi-schema.json2
-rw-r--r--trace-events1
8 files changed, 134 insertions, 1 deletions
diff --git a/arch_init.c b/arch_init.c
index 0e553c9..e9dd96f 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -104,6 +104,9 @@ int graphic_depth = 32;
#endif
const uint32_t arch_type = QEMU_ARCH;
+static bool mig_throttle_on;
+static int dirty_rate_high_cnt;
+static void check_guest_throttling(void);
/***********************************************************/
/* ram save/restore */
@@ -378,8 +381,14 @@ static void migration_bitmap_sync(void)
uint64_t num_dirty_pages_init = migration_dirty_pages;
MigrationState *s = migrate_get_current();
static int64_t start_time;
+ static int64_t bytes_xfer_prev;
static int64_t num_dirty_pages_period;
int64_t end_time;
+ int64_t bytes_xfer_now;
+
+ if (!bytes_xfer_prev) {
+ bytes_xfer_prev = ram_bytes_transferred();
+ }
if (!start_time) {
start_time = qemu_get_clock_ms(rt_clock);
@@ -404,6 +413,25 @@ static void migration_bitmap_sync(void)
/* more than 1 second = 1000 millisecons */
if (end_time > start_time + 1000) {
+ if (migrate_auto_converge()) {
+ /* The following detection logic can be refined later. For now:
+ Check to see if the dirtied bytes is 50% more than the approx.
+ amount of bytes that just got transferred since the last time we
+ were in this routine. If that happens >N times (for now N==4)
+ we turn on the throttle down logic */
+ bytes_xfer_now = ram_bytes_transferred();
+ if (s->dirty_pages_rate &&
+ (num_dirty_pages_period * TARGET_PAGE_SIZE >
+ (bytes_xfer_now - bytes_xfer_prev)/2) &&
+ (dirty_rate_high_cnt++ > 4)) {
+ trace_migration_throttle();
+ mig_throttle_on = true;
+ dirty_rate_high_cnt = 0;
+ }
+ bytes_xfer_prev = bytes_xfer_now;
+ } else {
+ mig_throttle_on = false;
+ }
s->dirty_pages_rate = num_dirty_pages_period * 1000
/ (end_time - start_time);
s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE;
@@ -573,6 +601,8 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
migration_bitmap = bitmap_new(ram_pages);
bitmap_set(migration_bitmap, 0, ram_pages);
migration_dirty_pages = ram_pages;
+ mig_throttle_on = false;
+ dirty_rate_high_cnt = 0;
if (migrate_use_xbzrle()) {
XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
@@ -635,6 +665,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
}
total_sent += bytes_sent;
acct_info.iterations++;
+ check_guest_throttling();
/* we want to check in the 1st loop, just in case it was the 1st time
and we had to sync the dirty bitmap.
qemu_get_clock_ns() is a bit expensive, so we only check each some
@@ -1110,3 +1141,53 @@ TargetInfo *qmp_query_target(Error **errp)
return info;
}
+
+/* Stub function that's gets run on the vcpu when its brought out of the
+ VM to run inside qemu via async_run_on_cpu()*/
+static void mig_sleep_cpu(void *opq)
+{
+ qemu_mutex_unlock_iothread();
+ g_usleep(30*1000);
+ qemu_mutex_lock_iothread();
+}
+
+/* To reduce the dirty rate explicitly disallow the VCPUs from spending
+ much time in the VM. The migration thread will try to catchup.
+ Workload will experience a performance drop.
+*/
+static void mig_throttle_cpu_down(CPUState *cpu, void *data)
+{
+ async_run_on_cpu(cpu, mig_sleep_cpu, NULL);
+}
+
+static void mig_throttle_guest_down(void)
+{
+ qemu_mutex_lock_iothread();
+ qemu_for_each_cpu(mig_throttle_cpu_down, NULL);
+ qemu_mutex_unlock_iothread();
+}
+
+static void check_guest_throttling(void)
+{
+ static int64_t t0;
+ int64_t t1;
+
+ if (!mig_throttle_on) {
+ return;
+ }
+
+ if (!t0) {
+ t0 = qemu_get_clock_ns(rt_clock);
+ return;
+ }
+
+ t1 = qemu_get_clock_ns(rt_clock);
+
+ /* If it has been more than 40 ms since the last time the guest
+ * was throttled then do it again.
+ */
+ if (40 < (t1-t0)/1000000) {
+ mig_throttle_guest_down();
+ t0 = t1;
+ }
+}
diff --git a/cpus.c b/cpus.c
index 29277e1..8062cdd 100644
--- a/cpus.c
+++ b/cpus.c
@@ -652,6 +652,7 @@ void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
wi.func = func;
wi.data = data;
+ wi.free = false;
if (cpu->queued_work_first == NULL) {
cpu->queued_work_first = &wi;
} else {
@@ -670,6 +671,31 @@ void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
}
}
+void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
+{
+ struct qemu_work_item *wi;
+
+ if (qemu_cpu_is_self(cpu)) {
+ func(data);
+ return;
+ }
+
+ wi = g_malloc0(sizeof(struct qemu_work_item));
+ wi->func = func;
+ wi->data = data;
+ wi->free = true;
+ if (cpu->queued_work_first == NULL) {
+ cpu->queued_work_first = wi;
+ } else {
+ cpu->queued_work_last->next = wi;
+ }
+ cpu->queued_work_last = wi;
+ wi->next = NULL;
+ wi->done = false;
+
+ qemu_cpu_kick(cpu);
+}
+
static void flush_queued_work(CPUState *cpu)
{
struct qemu_work_item *wi;
@@ -682,6 +708,9 @@ static void flush_queued_work(CPUState *cpu)
cpu->queued_work_first = wi->next;
wi->func(wi->data);
wi->done = true;
+ if (wi->free) {
+ g_free(wi);
+ }
}
cpu->queued_work_last = NULL;
qemu_cond_broadcast(&qemu_work_cond);
diff --git a/include/migration/migration.h b/include/migration/migration.h
index f0640e0..bc9fde0 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -125,6 +125,8 @@ void migrate_del_blocker(Error *reason);
bool migrate_rdma_pin_all(void);
+bool migrate_auto_converge(void);
+
int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen,
uint8_t *dst, int dlen);
int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen);
diff --git a/include/qemu-common.h b/include/qemu-common.h
index f439738..6948bb9 100644
--- a/include/qemu-common.h
+++ b/include/qemu-common.h
@@ -293,6 +293,7 @@ struct qemu_work_item {
void (*func)(void *data);
void *data;
int done;
+ bool free;
};
diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index 147c256..dfd81a1 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -379,6 +379,16 @@ bool cpu_is_stopped(CPUState *cpu);
void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data);
/**
+ * async_run_on_cpu:
+ * @cpu: The vCPU to run on.
+ * @func: The function to be executed.
+ * @data: Data to pass to the function.
+ *
+ * Schedules the function @func for execution on the vCPU @cpu asynchronously.
+ */
+void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data);
+
+/**
* qemu_for_each_cpu:
* @func: The function to be executed.
* @data: Data to pass to the function.
diff --git a/migration.c b/migration.c
index 0681d8e..9f5a423 100644
--- a/migration.c
+++ b/migration.c
@@ -484,6 +484,15 @@ bool migrate_rdma_pin_all(void)
return s->enabled_capabilities[MIGRATION_CAPABILITY_X_RDMA_PIN_ALL];
}
+bool migrate_auto_converge(void)
+{
+ MigrationState *s;
+
+ s = migrate_get_current();
+
+ return s->enabled_capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE];
+}
+
int migrate_use_xbzrle(void)
{
MigrationState *s;
diff --git a/qapi-schema.json b/qapi-schema.json
index cf57783..7b9fef1 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -616,7 +616,7 @@
# Since: 1.2
##
{ 'enum': 'MigrationCapability',
- 'data': ['xbzrle', 'x-rdma-pin-all'] }
+ 'data': ['xbzrle', 'x-rdma-pin-all', 'auto-converge'] }
##
# @MigrationCapabilityStatus
diff --git a/trace-events b/trace-events
index 0acce7b..7f6d962 100644
--- a/trace-events
+++ b/trace-events
@@ -1036,6 +1036,7 @@ savevm_section_end(unsigned int section_id) "section_id %u"
# arch_init.c
migration_bitmap_sync_start(void) ""
migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64""
+migration_throttle(void) ""
# hw/qxl.c
disable qxl_interface_set_mm_time(int qid, uint32_t mm_time) "%d %d"