diff options
-rw-r--r-- | arch_init.c | 81 | ||||
-rw-r--r-- | cpus.c | 29 | ||||
-rw-r--r-- | include/migration/migration.h | 2 | ||||
-rw-r--r-- | include/qemu-common.h | 1 | ||||
-rw-r--r-- | include/qom/cpu.h | 10 | ||||
-rw-r--r-- | migration.c | 9 | ||||
-rw-r--r-- | qapi-schema.json | 2 | ||||
-rw-r--r-- | trace-events | 1 |
8 files changed, 134 insertions, 1 deletions
diff --git a/arch_init.c b/arch_init.c index 0e553c9..e9dd96f 100644 --- a/arch_init.c +++ b/arch_init.c @@ -104,6 +104,9 @@ int graphic_depth = 32; #endif const uint32_t arch_type = QEMU_ARCH; +static bool mig_throttle_on; +static int dirty_rate_high_cnt; +static void check_guest_throttling(void); /***********************************************************/ /* ram save/restore */ @@ -378,8 +381,14 @@ static void migration_bitmap_sync(void) uint64_t num_dirty_pages_init = migration_dirty_pages; MigrationState *s = migrate_get_current(); static int64_t start_time; + static int64_t bytes_xfer_prev; static int64_t num_dirty_pages_period; int64_t end_time; + int64_t bytes_xfer_now; + + if (!bytes_xfer_prev) { + bytes_xfer_prev = ram_bytes_transferred(); + } if (!start_time) { start_time = qemu_get_clock_ms(rt_clock); @@ -404,6 +413,25 @@ static void migration_bitmap_sync(void) /* more than 1 second = 1000 millisecons */ if (end_time > start_time + 1000) { + if (migrate_auto_converge()) { + /* The following detection logic can be refined later. For now: + Check to see if the dirtied bytes is 50% more than the approx. + amount of bytes that just got transferred since the last time we + were in this routine. If that happens >N times (for now N==4) + we turn on the throttle down logic */ + bytes_xfer_now = ram_bytes_transferred(); + if (s->dirty_pages_rate && + (num_dirty_pages_period * TARGET_PAGE_SIZE > + (bytes_xfer_now - bytes_xfer_prev)/2) && + (dirty_rate_high_cnt++ > 4)) { + trace_migration_throttle(); + mig_throttle_on = true; + dirty_rate_high_cnt = 0; + } + bytes_xfer_prev = bytes_xfer_now; + } else { + mig_throttle_on = false; + } s->dirty_pages_rate = num_dirty_pages_period * 1000 / (end_time - start_time); s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE; @@ -573,6 +601,8 @@ static int ram_save_setup(QEMUFile *f, void *opaque) migration_bitmap = bitmap_new(ram_pages); bitmap_set(migration_bitmap, 0, ram_pages); migration_dirty_pages = ram_pages; + mig_throttle_on = false; + dirty_rate_high_cnt = 0; if (migrate_use_xbzrle()) { XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() / @@ -635,6 +665,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) } total_sent += bytes_sent; acct_info.iterations++; + check_guest_throttling(); /* we want to check in the 1st loop, just in case it was the 1st time and we had to sync the dirty bitmap. qemu_get_clock_ns() is a bit expensive, so we only check each some @@ -1110,3 +1141,53 @@ TargetInfo *qmp_query_target(Error **errp) return info; } + +/* Stub function that's gets run on the vcpu when its brought out of the + VM to run inside qemu via async_run_on_cpu()*/ +static void mig_sleep_cpu(void *opq) +{ + qemu_mutex_unlock_iothread(); + g_usleep(30*1000); + qemu_mutex_lock_iothread(); +} + +/* To reduce the dirty rate explicitly disallow the VCPUs from spending + much time in the VM. The migration thread will try to catchup. + Workload will experience a performance drop. +*/ +static void mig_throttle_cpu_down(CPUState *cpu, void *data) +{ + async_run_on_cpu(cpu, mig_sleep_cpu, NULL); +} + +static void mig_throttle_guest_down(void) +{ + qemu_mutex_lock_iothread(); + qemu_for_each_cpu(mig_throttle_cpu_down, NULL); + qemu_mutex_unlock_iothread(); +} + +static void check_guest_throttling(void) +{ + static int64_t t0; + int64_t t1; + + if (!mig_throttle_on) { + return; + } + + if (!t0) { + t0 = qemu_get_clock_ns(rt_clock); + return; + } + + t1 = qemu_get_clock_ns(rt_clock); + + /* If it has been more than 40 ms since the last time the guest + * was throttled then do it again. + */ + if (40 < (t1-t0)/1000000) { + mig_throttle_guest_down(); + t0 = t1; + } +} @@ -652,6 +652,7 @@ void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data) wi.func = func; wi.data = data; + wi.free = false; if (cpu->queued_work_first == NULL) { cpu->queued_work_first = &wi; } else { @@ -670,6 +671,31 @@ void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data) } } +void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data) +{ + struct qemu_work_item *wi; + + if (qemu_cpu_is_self(cpu)) { + func(data); + return; + } + + wi = g_malloc0(sizeof(struct qemu_work_item)); + wi->func = func; + wi->data = data; + wi->free = true; + if (cpu->queued_work_first == NULL) { + cpu->queued_work_first = wi; + } else { + cpu->queued_work_last->next = wi; + } + cpu->queued_work_last = wi; + wi->next = NULL; + wi->done = false; + + qemu_cpu_kick(cpu); +} + static void flush_queued_work(CPUState *cpu) { struct qemu_work_item *wi; @@ -682,6 +708,9 @@ static void flush_queued_work(CPUState *cpu) cpu->queued_work_first = wi->next; wi->func(wi->data); wi->done = true; + if (wi->free) { + g_free(wi); + } } cpu->queued_work_last = NULL; qemu_cond_broadcast(&qemu_work_cond); diff --git a/include/migration/migration.h b/include/migration/migration.h index f0640e0..bc9fde0 100644 --- a/include/migration/migration.h +++ b/include/migration/migration.h @@ -125,6 +125,8 @@ void migrate_del_blocker(Error *reason); bool migrate_rdma_pin_all(void); +bool migrate_auto_converge(void); + int xbzrle_encode_buffer(uint8_t *old_buf, uint8_t *new_buf, int slen, uint8_t *dst, int dlen); int xbzrle_decode_buffer(uint8_t *src, int slen, uint8_t *dst, int dlen); diff --git a/include/qemu-common.h b/include/qemu-common.h index f439738..6948bb9 100644 --- a/include/qemu-common.h +++ b/include/qemu-common.h @@ -293,6 +293,7 @@ struct qemu_work_item { void (*func)(void *data); void *data; int done; + bool free; }; diff --git a/include/qom/cpu.h b/include/qom/cpu.h index 147c256..dfd81a1 100644 --- a/include/qom/cpu.h +++ b/include/qom/cpu.h @@ -379,6 +379,16 @@ bool cpu_is_stopped(CPUState *cpu); void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data); /** + * async_run_on_cpu: + * @cpu: The vCPU to run on. + * @func: The function to be executed. + * @data: Data to pass to the function. + * + * Schedules the function @func for execution on the vCPU @cpu asynchronously. + */ +void async_run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data); + +/** * qemu_for_each_cpu: * @func: The function to be executed. * @data: Data to pass to the function. diff --git a/migration.c b/migration.c index 0681d8e..9f5a423 100644 --- a/migration.c +++ b/migration.c @@ -484,6 +484,15 @@ bool migrate_rdma_pin_all(void) return s->enabled_capabilities[MIGRATION_CAPABILITY_X_RDMA_PIN_ALL]; } +bool migrate_auto_converge(void) +{ + MigrationState *s; + + s = migrate_get_current(); + + return s->enabled_capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE]; +} + int migrate_use_xbzrle(void) { MigrationState *s; diff --git a/qapi-schema.json b/qapi-schema.json index cf57783..7b9fef1 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -616,7 +616,7 @@ # Since: 1.2 ## { 'enum': 'MigrationCapability', - 'data': ['xbzrle', 'x-rdma-pin-all'] } + 'data': ['xbzrle', 'x-rdma-pin-all', 'auto-converge'] } ## # @MigrationCapabilityStatus diff --git a/trace-events b/trace-events index 0acce7b..7f6d962 100644 --- a/trace-events +++ b/trace-events @@ -1036,6 +1036,7 @@ savevm_section_end(unsigned int section_id) "section_id %u" # arch_init.c migration_bitmap_sync_start(void) "" migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64"" +migration_throttle(void) "" # hw/qxl.c disable qxl_interface_set_mm_time(int qid, uint32_t mm_time) "%d %d" |