diff options
-rw-r--r-- | block.c | 11 | ||||
-rw-r--r-- | block/mirror.c | 4 | ||||
-rw-r--r-- | block/raw-posix.c | 2 | ||||
-rw-r--r-- | block/raw-win32.c | 4 | ||||
-rw-r--r-- | cpus.c | 78 | ||||
-rw-r--r-- | disas/cris.c | 2 | ||||
-rw-r--r-- | hmp.c | 21 | ||||
-rw-r--r-- | include/migration/qemu-file.h | 18 | ||||
-rw-r--r-- | include/migration/vmstate.h | 3 | ||||
-rw-r--r-- | include/qom/cpu.h | 42 | ||||
-rw-r--r-- | migration/migration.c | 134 | ||||
-rw-r--r-- | migration/qemu-file-buf.c | 9 | ||||
-rw-r--r-- | migration/qemu-file-stdio.c | 15 | ||||
-rw-r--r-- | migration/qemu-file-unix.c | 10 | ||||
-rw-r--r-- | migration/qemu-file.c | 24 | ||||
-rw-r--r-- | migration/ram.c | 232 | ||||
-rw-r--r-- | migration/rdma.c | 30 | ||||
-rw-r--r-- | migration/savevm.c | 19 | ||||
-rw-r--r-- | qapi-schema.json | 40 | ||||
-rw-r--r-- | qtest.c | 1 | ||||
-rw-r--r-- | target-microblaze/cpu.c | 4 | ||||
-rw-r--r-- | tests/qemu-iotests/049.out | 1 | ||||
-rwxr-xr-x | tests/qemu-iotests/128 | 9 | ||||
-rw-r--r-- | trace-events | 4 | ||||
-rw-r--r-- | ui/cocoa.m | 119 | ||||
-rw-r--r-- | vl.c | 1 |
26 files changed, 563 insertions, 274 deletions
@@ -1907,6 +1907,12 @@ void bdrv_close(BlockDriverState *bs) if (bs->job) { block_job_cancel_sync(bs->job); } + + /* Disable I/O limits and drain all pending throttled requests */ + if (bs->io_limits_enabled) { + bdrv_io_limits_disable(bs); + } + bdrv_drain(bs); /* complete I/O */ bdrv_flush(bs); bdrv_drain(bs); /* in case flush left pending I/O */ @@ -1958,11 +1964,6 @@ void bdrv_close(BlockDriverState *bs) blk_dev_change_media_cb(bs->blk, false); } - /*throttling disk I/O limits*/ - if (bs->io_limits_enabled) { - bdrv_io_limits_disable(bs); - } - QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) { g_free(ban); } diff --git a/block/mirror.c b/block/mirror.c index a258926..87928ab 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -455,6 +455,8 @@ static void coroutine_fn mirror_run(void *opaque) if (!s->is_none_mode) { /* First part, loop on the sectors and initialize the dirty bitmap. */ BlockDriverState *base = s->base; + bool mark_all_dirty = s->base == NULL && !bdrv_has_zero_init(s->target); + for (sector_num = 0; sector_num < end; ) { /* Just to make sure we are not exceeding int limit. */ int nb_sectors = MIN(INT_MAX >> BDRV_SECTOR_BITS, @@ -477,7 +479,7 @@ static void coroutine_fn mirror_run(void *opaque) } assert(n > 0); - if (ret == 1) { + if (ret == 1 || mark_all_dirty) { bdrv_set_dirty_bitmap(s->dirty_bitmap, sector_num, n); } sector_num += n; diff --git a/block/raw-posix.c b/block/raw-posix.c index 30df8ad..86f8562 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -1648,7 +1648,7 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp) goto out; } - fd = qemu_open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, + fd = qemu_open(filename, O_RDWR | O_CREAT | O_TRUNC | O_BINARY, 0644); if (fd < 0) { result = -errno; diff --git a/block/raw-win32.c b/block/raw-win32.c index 68f2338..b562c94 100644 --- a/block/raw-win32.c +++ b/block/raw-win32.c @@ -119,9 +119,9 @@ static int aio_worker(void *arg) case QEMU_AIO_WRITE: count = handle_aiocb_rw(aiocb); if (count == aiocb->aio_nbytes) { - count = 0; + ret = 0; } else { - count = -EINVAL; + ret = -EINVAL; } break; case QEMU_AIO_FLUSH: @@ -69,6 +69,14 @@ static CPUState *next_cpu; int64_t max_delay; int64_t max_advance; +/* vcpu throttling controls */ +static QEMUTimer *throttle_timer; +static unsigned int throttle_percentage; + +#define CPU_THROTTLE_PCT_MIN 1 +#define CPU_THROTTLE_PCT_MAX 99 +#define CPU_THROTTLE_TIMESLICE_NS 10000000 + bool cpu_is_stopped(CPUState *cpu) { return cpu->stopped || !runstate_is_running(); @@ -505,10 +513,80 @@ static const VMStateDescription vmstate_timers = { } }; +static void cpu_throttle_thread(void *opaque) +{ + CPUState *cpu = opaque; + double pct; + double throttle_ratio; + long sleeptime_ns; + + if (!cpu_throttle_get_percentage()) { + return; + } + + pct = (double)cpu_throttle_get_percentage()/100; + throttle_ratio = pct / (1 - pct); + sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS); + + qemu_mutex_unlock_iothread(); + atomic_set(&cpu->throttle_thread_scheduled, 0); + g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */ + qemu_mutex_lock_iothread(); +} + +static void cpu_throttle_timer_tick(void *opaque) +{ + CPUState *cpu; + double pct; + + /* Stop the timer if needed */ + if (!cpu_throttle_get_percentage()) { + return; + } + CPU_FOREACH(cpu) { + if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) { + async_run_on_cpu(cpu, cpu_throttle_thread, cpu); + } + } + + pct = (double)cpu_throttle_get_percentage()/100; + timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) + + CPU_THROTTLE_TIMESLICE_NS / (1-pct)); +} + +void cpu_throttle_set(int new_throttle_pct) +{ + /* Ensure throttle percentage is within valid range */ + new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX); + new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN); + + atomic_set(&throttle_percentage, new_throttle_pct); + + timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) + + CPU_THROTTLE_TIMESLICE_NS); +} + +void cpu_throttle_stop(void) +{ + atomic_set(&throttle_percentage, 0); +} + +bool cpu_throttle_active(void) +{ + return (cpu_throttle_get_percentage() != 0); +} + +int cpu_throttle_get_percentage(void) +{ + return atomic_read(&throttle_percentage); +} + void cpu_ticks_init(void) { seqlock_init(&timers_state.vm_clock_seqlock, NULL); vmstate_register(NULL, 0, &vmstate_timers, &timers_state); + throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT, + cpu_throttle_timer_tick, NULL); } void configure_icount(QemuOpts *opts, Error **errp) diff --git a/disas/cris.c b/disas/cris.c index 1b76a09..4482a41 100644 --- a/disas/cris.c +++ b/disas/cris.c @@ -2492,7 +2492,7 @@ print_with_operands (const struct cris_opcode *opcodep, = spec_reg_info ((insn >> 12) & 15, disdata->distype); if (sregp->name == NULL) - /* Should have been caught as a non-match eariler. */ + /* Should have been caught as a non-match earlier. */ *tp++ = '?'; else { @@ -232,6 +232,11 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) info->xbzrle_cache->overflow); } + if (info->has_x_cpu_throttle_percentage) { + monitor_printf(mon, "cpu throttle percentage: %" PRIu64 "\n", + info->x_cpu_throttle_percentage); + } + qapi_free_MigrationInfo(info); qapi_free_MigrationCapabilityStatusList(caps); } @@ -272,6 +277,12 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict) monitor_printf(mon, " %s: %" PRId64, MigrationParameter_lookup[MIGRATION_PARAMETER_DECOMPRESS_THREADS], params->decompress_threads); + monitor_printf(mon, " %s: %" PRId64, + MigrationParameter_lookup[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL], + params->x_cpu_throttle_initial); + monitor_printf(mon, " %s: %" PRId64, + MigrationParameter_lookup[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT], + params->x_cpu_throttle_increment); monitor_printf(mon, "\n"); } @@ -1221,6 +1232,8 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) bool has_compress_level = false; bool has_compress_threads = false; bool has_decompress_threads = false; + bool has_x_cpu_throttle_initial = false; + bool has_x_cpu_throttle_increment = false; int i; for (i = 0; i < MIGRATION_PARAMETER_MAX; i++) { @@ -1235,10 +1248,18 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) case MIGRATION_PARAMETER_DECOMPRESS_THREADS: has_decompress_threads = true; break; + case MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL: + has_x_cpu_throttle_initial = true; + break; + case MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT: + has_x_cpu_throttle_increment = true; + break; } qmp_migrate_set_parameters(has_compress_level, value, has_compress_threads, value, has_decompress_threads, value, + has_x_cpu_throttle_initial, value, + has_x_cpu_throttle_increment, value, &err); break; } diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h index ea49f33..29a338d 100644 --- a/include/migration/qemu-file.h +++ b/include/migration/qemu-file.h @@ -31,15 +31,15 @@ * The pos argument can be ignored if the file is only being used for * streaming. The handler should try to write all of the data it can. */ -typedef int (QEMUFilePutBufferFunc)(void *opaque, const uint8_t *buf, - int64_t pos, int size); +typedef ssize_t (QEMUFilePutBufferFunc)(void *opaque, const uint8_t *buf, + int64_t pos, size_t size); /* Read a chunk of data from a file at the given position. The pos argument * can be ignored if the file is only be used for streaming. The number of * bytes actually read should be returned. */ -typedef int (QEMUFileGetBufferFunc)(void *opaque, uint8_t *buf, - int64_t pos, int size); +typedef ssize_t (QEMUFileGetBufferFunc)(void *opaque, uint8_t *buf, + int64_t pos, size_t size); /* Close a file * @@ -126,13 +126,13 @@ int qemu_get_fd(QEMUFile *f); int qemu_fclose(QEMUFile *f); int64_t qemu_ftell(QEMUFile *f); int64_t qemu_ftell_fast(QEMUFile *f); -void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, int size); +void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, size_t size); void qemu_put_byte(QEMUFile *f, int v); /* * put_buffer without copying the buffer. * The buffer should be available till it is sent asynchronously. */ -void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, int size); +void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, size_t size); bool qemu_file_mode_is_not_valid(const char *mode); bool qemu_file_is_writable(QEMUFile *f); @@ -161,8 +161,8 @@ static inline void qemu_put_ubyte(QEMUFile *f, unsigned int v) void qemu_put_be16(QEMUFile *f, unsigned int v); void qemu_put_be32(QEMUFile *f, unsigned int v); void qemu_put_be64(QEMUFile *f, uint64_t v); -int qemu_peek_buffer(QEMUFile *f, uint8_t **buf, int size, size_t offset); -int qemu_get_buffer(QEMUFile *f, uint8_t *buf, int size); +size_t qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t size, size_t offset); +size_t qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size); ssize_t qemu_put_compression_data(QEMUFile *f, const uint8_t *p, size_t size, int level); int qemu_put_qemu_file(QEMUFile *f_des, QEMUFile *f_src); @@ -237,7 +237,7 @@ static inline void qemu_get_8s(QEMUFile *f, uint8_t *pv) } // Signed versions for type safety -static inline void qemu_put_sbuffer(QEMUFile *f, const int8_t *buf, int size) +static inline void qemu_put_sbuffer(QEMUFile *f, const int8_t *buf, size_t size) { qemu_put_buffer(f, (const uint8_t *)buf, size); } diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h index 2e5a97d..9a65522 100644 --- a/include/migration/vmstate.h +++ b/include/migration/vmstate.h @@ -754,9 +754,6 @@ extern const VMStateInfo vmstate_info_bitmap; #define VMSTATE_UINT32_SUB_ARRAY(_f, _s, _start, _num) \ VMSTATE_SUB_ARRAY(_f, _s, _start, _num, 0, vmstate_info_uint32, uint32_t) -#define VMSTATE_UINT32_ARRAY(_f, _s, _n) \ - VMSTATE_UINT32_ARRAY_V(_f, _s, _n, 0) - #define VMSTATE_INT64_ARRAY_V(_f, _s, _n, _v) \ VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_int64, int64_t) diff --git a/include/qom/cpu.h b/include/qom/cpu.h index 302673d..9405554 100644 --- a/include/qom/cpu.h +++ b/include/qom/cpu.h @@ -321,6 +321,11 @@ struct CPUState { uint32_t can_do_io; int32_t exception_index; /* used by m68k TCG */ + /* Used to keep track of an outstanding cpu throttle thread for migration + * autoconverge + */ + bool throttle_thread_scheduled; + /* Note that this is accessed at the start of every TB via a negative offset from AREG0. Leave this field at the end so as to make the (absolute value) offset as small as possible. This reduces code @@ -565,6 +570,43 @@ CPUState *qemu_get_cpu(int index); */ bool cpu_exists(int64_t id); +/** + * cpu_throttle_set: + * @new_throttle_pct: Percent of sleep time. Valid range is 1 to 99. + * + * Throttles all vcpus by forcing them to sleep for the given percentage of + * time. A throttle_percentage of 25 corresponds to a 75% duty cycle roughly. + * (example: 10ms sleep for every 30ms awake). + * + * cpu_throttle_set can be called as needed to adjust new_throttle_pct. + * Once the throttling starts, it will remain in effect until cpu_throttle_stop + * is called. + */ +void cpu_throttle_set(int new_throttle_pct); + +/** + * cpu_throttle_stop: + * + * Stops the vcpu throttling started by cpu_throttle_set. + */ +void cpu_throttle_stop(void); + +/** + * cpu_throttle_active: + * + * Returns: %true if the vcpus are currently being throttled, %false otherwise. + */ +bool cpu_throttle_active(void); + +/** + * cpu_throttle_get_percentage: + * + * Returns the vcpu throttle percentage. See cpu_throttle_set for details. + * + * Returns: The throttle percentage in range 1 to 99. + */ +int cpu_throttle_get_percentage(void); + #ifndef CONFIG_USER_ONLY typedef void (*CPUInterruptHandler)(CPUState *, int); diff --git a/migration/migration.c b/migration/migration.c index 662e77e..b7de9b7 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -29,8 +29,9 @@ #include "trace.h" #include "qapi/util.h" #include "qapi-event.h" +#include "qom/cpu.h" -#define MAX_THROTTLE (32 << 20) /* Migration speed throttling */ +#define MAX_THROTTLE (32 << 20) /* Migration transfer speed throttling */ /* Amount of time to allocate to each "chunk" of bandwidth-throttled * data. */ @@ -44,6 +45,9 @@ #define DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT 2 /*0: means nocompress, 1: best speed, ... 9: best compress ratio */ #define DEFAULT_MIGRATE_COMPRESS_LEVEL 1 +/* Define default autoconverge cpu throttle migration parameters */ +#define DEFAULT_MIGRATE_X_CPU_THROTTLE_INITIAL 20 +#define DEFAULT_MIGRATE_X_CPU_THROTTLE_INCREMENT 10 /* Migration XBZRLE default cache size */ #define DEFAULT_MIGRATE_CACHE_SIZE (64 * 1024 * 1024) @@ -71,6 +75,10 @@ MigrationState *migrate_get_current(void) DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT, .parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS] = DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT, + .parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL] = + DEFAULT_MIGRATE_X_CPU_THROTTLE_INITIAL, + .parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT] = + DEFAULT_MIGRATE_X_CPU_THROTTLE_INCREMENT, }; return ¤t_migration; @@ -86,7 +94,7 @@ MigrationIncomingState *migration_incoming_get_current(void) MigrationIncomingState *migration_incoming_state_new(QEMUFile* f) { - mis_current = g_malloc0(sizeof(MigrationIncomingState)); + mis_current = g_new0(MigrationIncomingState, 1); mis_current->file = f; QLIST_INIT(&mis_current->loadvm_handlers); @@ -372,6 +380,10 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) s->parameters[MIGRATION_PARAMETER_COMPRESS_THREADS]; params->decompress_threads = s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS]; + params->x_cpu_throttle_initial = + s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL]; + params->x_cpu_throttle_increment = + s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT]; return params; } @@ -435,6 +447,11 @@ MigrationInfo *qmp_query_migrate(Error **errp) info->disk->total = blk_mig_bytes_total(); } + if (cpu_throttle_active()) { + info->has_x_cpu_throttle_percentage = true; + info->x_cpu_throttle_percentage = cpu_throttle_get_percentage(); + } + get_xbzrle_cache_stats(info); break; case MIGRATION_STATUS_COMPLETED: @@ -494,7 +511,11 @@ void qmp_migrate_set_parameters(bool has_compress_level, bool has_compress_threads, int64_t compress_threads, bool has_decompress_threads, - int64_t decompress_threads, Error **errp) + int64_t decompress_threads, + bool has_x_cpu_throttle_initial, + int64_t x_cpu_throttle_initial, + bool has_x_cpu_throttle_increment, + int64_t x_cpu_throttle_increment, Error **errp) { MigrationState *s = migrate_get_current(); @@ -517,6 +538,18 @@ void qmp_migrate_set_parameters(bool has_compress_level, "is invalid, it should be in the range of 1 to 255"); return; } + if (has_x_cpu_throttle_initial && + (x_cpu_throttle_initial < 1 || x_cpu_throttle_initial > 99)) { + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, + "x_cpu_throttle_initial", + "an integer in the range of 1 to 99"); + } + if (has_x_cpu_throttle_increment && + (x_cpu_throttle_increment < 1 || x_cpu_throttle_increment > 99)) { + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, + "x_cpu_throttle_increment", + "an integer in the range of 1 to 99"); + } if (has_compress_level) { s->parameters[MIGRATION_PARAMETER_COMPRESS_LEVEL] = compress_level; @@ -528,6 +561,15 @@ void qmp_migrate_set_parameters(bool has_compress_level, s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS] = decompress_threads; } + if (has_x_cpu_throttle_initial) { + s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL] = + x_cpu_throttle_initial; + } + + if (has_x_cpu_throttle_increment) { + s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT] = + x_cpu_throttle_increment; + } } /* shared migration helpers */ @@ -643,6 +685,10 @@ static MigrationState *migrate_init(const MigrationParams *params) s->parameters[MIGRATION_PARAMETER_COMPRESS_THREADS]; int decompress_thread_count = s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS]; + int x_cpu_throttle_initial = + s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL]; + int x_cpu_throttle_increment = + s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT]; memcpy(enabled_capabilities, s->enabled_capabilities, sizeof(enabled_capabilities)); @@ -658,6 +704,10 @@ static MigrationState *migrate_init(const MigrationParams *params) compress_thread_count; s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS] = decompress_thread_count; + s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL] = + x_cpu_throttle_initial; + s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT] = + x_cpu_throttle_increment; s->bandwidth_limit = bandwidth_limit; migrate_set_state(s, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP); @@ -913,6 +963,50 @@ int64_t migrate_xbzrle_cache_size(void) return s->xbzrle_cache_size; } +/** + * migration_completion: Used by migration_thread when there's not much left. + * The caller 'breaks' the loop when this returns. + * + * @s: Current migration state + * @*old_vm_running: Pointer to old_vm_running flag + * @*start_time: Pointer to time to update + */ +static void migration_completion(MigrationState *s, bool *old_vm_running, + int64_t *start_time) +{ + int ret; + + qemu_mutex_lock_iothread(); + *start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); + qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER); + *old_vm_running = runstate_is_running(); + + ret = global_state_store(); + if (!ret) { + ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); + if (ret >= 0) { + qemu_file_set_rate_limit(s->file, INT64_MAX); + qemu_savevm_state_complete(s->file); + } + } + qemu_mutex_unlock_iothread(); + + if (ret < 0) { + goto fail; + } + + if (qemu_file_get_error(s->file)) { + trace_migration_completion_file_err(); + goto fail; + } + + migrate_set_state(s, MIGRATION_STATUS_ACTIVE, MIGRATION_STATUS_COMPLETED); + return; + +fail: + migrate_set_state(s, MIGRATION_STATUS_ACTIVE, MIGRATION_STATUS_FAILED); +} + /* migration thread support */ static void *migration_thread(void *opaque) @@ -943,34 +1037,9 @@ static void *migration_thread(void *opaque) if (pending_size && pending_size >= max_size) { qemu_savevm_state_iterate(s->file); } else { - int ret; - - qemu_mutex_lock_iothread(); - start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); - qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER); - old_vm_running = runstate_is_running(); - - ret = global_state_store(); - if (!ret) { - ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); - if (ret >= 0) { - qemu_file_set_rate_limit(s->file, INT64_MAX); - qemu_savevm_state_complete(s->file); - } - } - qemu_mutex_unlock_iothread(); - - if (ret < 0) { - migrate_set_state(s, MIGRATION_STATUS_ACTIVE, - MIGRATION_STATUS_FAILED); - break; - } - - if (!qemu_file_get_error(s->file)) { - migrate_set_state(s, MIGRATION_STATUS_ACTIVE, - MIGRATION_STATUS_COMPLETED); - break; - } + trace_migration_thread_low_pending(pending_size); + migration_completion(s, &old_vm_running, &start_time); + break; } } @@ -1007,6 +1076,9 @@ static void *migration_thread(void *opaque) } } + /* If we enabled cpu throttling for auto-converge, turn it off. */ + cpu_throttle_stop(); + qemu_mutex_lock_iothread(); if (s->state == MIGRATION_STATUS_COMPLETED) { int64_t end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); diff --git a/migration/qemu-file-buf.c b/migration/qemu-file-buf.c index 2de9330..e3fd085 100644 --- a/migration/qemu-file-buf.c +++ b/migration/qemu-file-buf.c @@ -372,7 +372,8 @@ typedef struct QEMUBuffer { bool qsb_allocated; } QEMUBuffer; -static int buf_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size) +static ssize_t buf_get_buffer(void *opaque, uint8_t *buf, int64_t pos, + size_t size) { QEMUBuffer *s = opaque; ssize_t len = qsb_get_length(s->qsb) - pos; @@ -387,8 +388,8 @@ static int buf_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size) return qsb_get_buffer(s->qsb, pos, len, buf); } -static int buf_put_buffer(void *opaque, const uint8_t *buf, - int64_t pos, int size) +static ssize_t buf_put_buffer(void *opaque, const uint8_t *buf, + int64_t pos, size_t size) { QEMUBuffer *s = opaque; @@ -439,7 +440,7 @@ QEMUFile *qemu_bufopen(const char *mode, QEMUSizedBuffer *input) return NULL; } - s = g_malloc0(sizeof(QEMUBuffer)); + s = g_new0(QEMUBuffer, 1); s->qsb = input; if (s->qsb == NULL) { diff --git a/migration/qemu-file-stdio.c b/migration/qemu-file-stdio.c index 285068b..889ffb3 100644 --- a/migration/qemu-file-stdio.c +++ b/migration/qemu-file-stdio.c @@ -37,11 +37,11 @@ static int stdio_get_fd(void *opaque) return fileno(s->stdio_file); } -static int stdio_put_buffer(void *opaque, const uint8_t *buf, int64_t pos, - int size) +static ssize_t stdio_put_buffer(void *opaque, const uint8_t *buf, int64_t pos, + size_t size) { QEMUFileStdio *s = opaque; - int res; + size_t res; res = fwrite(buf, 1, size, s->stdio_file); @@ -51,11 +51,12 @@ static int stdio_put_buffer(void *opaque, const uint8_t *buf, int64_t pos, return res; } -static int stdio_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size) +static ssize_t stdio_get_buffer(void *opaque, uint8_t *buf, int64_t pos, + size_t size) { QEMUFileStdio *s = opaque; FILE *fp = s->stdio_file; - int bytes; + ssize_t bytes; for (;;) { clearerr(fp); @@ -143,7 +144,7 @@ QEMUFile *qemu_popen_cmd(const char *command, const char *mode) return NULL; } - s = g_malloc0(sizeof(QEMUFileStdio)); + s = g_new0(QEMUFileStdio, 1); s->stdio_file = stdio_file; @@ -175,7 +176,7 @@ QEMUFile *qemu_fopen(const char *filename, const char *mode) return NULL; } - s = g_malloc0(sizeof(QEMUFileStdio)); + s = g_new0(QEMUFileStdio, 1); s->stdio_file = fopen(filename, mode); if (!s->stdio_file) { diff --git a/migration/qemu-file-unix.c b/migration/qemu-file-unix.c index bfbc086..bf7a0e4 100644 --- a/migration/qemu-file-unix.c +++ b/migration/qemu-file-unix.c @@ -54,7 +54,8 @@ static int socket_get_fd(void *opaque) return s->fd; } -static int socket_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size) +static ssize_t socket_get_buffer(void *opaque, uint8_t *buf, int64_t pos, + size_t size) { QEMUFileSocket *s = opaque; ssize_t len; @@ -138,7 +139,8 @@ static ssize_t unix_writev_buffer(void *opaque, struct iovec *iov, int iovcnt, return total; } -static int unix_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size) +static ssize_t unix_get_buffer(void *opaque, uint8_t *buf, int64_t pos, + size_t size) { QEMUFileSocket *s = opaque; ssize_t len; @@ -192,7 +194,7 @@ QEMUFile *qemu_fdopen(int fd, const char *mode) return NULL; } - s = g_malloc0(sizeof(QEMUFileSocket)); + s = g_new0(QEMUFileSocket, 1); s->fd = fd; if (mode[0] == 'r') { @@ -226,7 +228,7 @@ QEMUFile *qemu_fopen_socket(int fd, const char *mode) return NULL; } - s = g_malloc0(sizeof(QEMUFileSocket)); + s = g_new0(QEMUFileSocket, 1); s->fd = fd; if (mode[0] == 'w') { qemu_set_block(s->fd); diff --git a/migration/qemu-file.c b/migration/qemu-file.c index 6bb3dc1..49addf6 100644 --- a/migration/qemu-file.c +++ b/migration/qemu-file.c @@ -60,7 +60,7 @@ QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops) { QEMUFile *f; - f = g_malloc0(sizeof(QEMUFile)); + f = g_new0(QEMUFile, 1); f->opaque = opaque; f->ops = ops; @@ -270,7 +270,7 @@ int qemu_fclose(QEMUFile *f) return ret; } -static void add_to_iovec(QEMUFile *f, const uint8_t *buf, int size) +static void add_to_iovec(QEMUFile *f, const uint8_t *buf, size_t size) { /* check for adjacent buffer and coalesce them */ if (f->iovcnt > 0 && buf == f->iov[f->iovcnt - 1].iov_base + @@ -286,7 +286,7 @@ static void add_to_iovec(QEMUFile *f, const uint8_t *buf, int size) } } -void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, int size) +void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, size_t size) { if (!f->ops->writev_buffer) { qemu_put_buffer(f, buf, size); @@ -301,9 +301,9 @@ void qemu_put_buffer_async(QEMUFile *f, const uint8_t *buf, int size) add_to_iovec(f, buf, size); } -void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, int size) +void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, size_t size) { - int l; + size_t l; if (f->last_error) { return; @@ -363,10 +363,10 @@ void qemu_file_skip(QEMUFile *f, int size) * return as many as it managed to read (assuming blocking fd's which * all current QEMUFile are) */ -int qemu_peek_buffer(QEMUFile *f, uint8_t **buf, int size, size_t offset) +size_t qemu_peek_buffer(QEMUFile *f, uint8_t **buf, size_t size, size_t offset) { - int pending; - int index; + ssize_t pending; + size_t index; assert(!qemu_file_is_writable(f)); assert(offset < IO_BUF_SIZE); @@ -411,13 +411,13 @@ int qemu_peek_buffer(QEMUFile *f, uint8_t **buf, int size, size_t offset) * return as many as it managed to read (assuming blocking fd's which * all current QEMUFile are) */ -int qemu_get_buffer(QEMUFile *f, uint8_t *buf, int size) +size_t qemu_get_buffer(QEMUFile *f, uint8_t *buf, size_t size) { - int pending = size; - int done = 0; + size_t pending = size; + size_t done = 0; while (pending > 0) { - int res; + size_t res; uint8_t *src; res = qemu_peek_buffer(f, &src, MIN(pending, IO_BUF_SIZE), 0); diff --git a/migration/ram.c b/migration/ram.c index 7f007e6..2d1d0b9 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -47,9 +47,7 @@ do { } while (0) #endif -static bool mig_throttle_on; static int dirty_rate_high_cnt; -static void check_guest_throttling(void); static uint64_t bitmap_sync_count; @@ -227,6 +225,17 @@ static uint64_t migration_dirty_pages; static uint32_t last_version; static bool ram_bulk_stage; +/* used by the search for pages to send */ +struct PageSearchStatus { + /* Current block being searched */ + RAMBlock *block; + /* Current offset to search from */ + ram_addr_t offset; + /* Set once we wrap around */ + bool complete_round; +}; +typedef struct PageSearchStatus PageSearchStatus; + struct CompressParam { bool start; bool done; @@ -396,6 +405,29 @@ static size_t save_page_header(QEMUFile *f, RAMBlock *block, ram_addr_t offset) return size; } +/* Reduce amount of guest cpu execution to hopefully slow down memory writes. + * If guest dirty memory rate is reduced below the rate at which we can + * transfer pages to the destination then we should be able to complete + * migration. Some workloads dirty memory way too fast and will not effectively + * converge, even with auto-converge. + */ +static void mig_throttle_guest_down(void) +{ + MigrationState *s = migrate_get_current(); + uint64_t pct_initial = + s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INITIAL]; + uint64_t pct_icrement = + s->parameters[MIGRATION_PARAMETER_X_CPU_THROTTLE_INCREMENT]; + + /* We have not started throttling yet. Let's start it. */ + if (!cpu_throttle_active()) { + cpu_throttle_set(pct_initial); + } else { + /* Throttling already on, just increase the rate */ + cpu_throttle_set(cpu_throttle_get_percentage() + pct_icrement); + } +} + /* Update the xbzrle cache to reflect a page that's been sent as all 0. * The important thing is that a stale (not-yet-0'd) page be replaced * by the new data. @@ -497,13 +529,13 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data, /* Called with rcu_read_lock() to protect migration_bitmap */ static inline -ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr, +ram_addr_t migration_bitmap_find_and_reset_dirty(RAMBlock *rb, ram_addr_t start) { - unsigned long base = mr->ram_addr >> TARGET_PAGE_BITS; + unsigned long base = rb->offset >> TARGET_PAGE_BITS; unsigned long nr = base + (start >> TARGET_PAGE_BITS); - uint64_t mr_size = TARGET_PAGE_ALIGN(memory_region_size(mr)); - unsigned long size = base + (mr_size >> TARGET_PAGE_BITS); + uint64_t rb_size = rb->used_length; + unsigned long size = base + (rb_size >> TARGET_PAGE_BITS); unsigned long *bitmap; unsigned long next; @@ -531,7 +563,6 @@ static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length) cpu_physical_memory_sync_dirty_bitmap(bitmap, start, length); } - /* Fix me: there are too many global variables used in migration process. */ static int64_t start_time; static int64_t bytes_xfer_prev; @@ -573,7 +604,7 @@ static void migration_bitmap_sync(void) qemu_mutex_lock(&migration_bitmap_mutex); rcu_read_lock(); QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { - migration_bitmap_sync_range(block->mr->ram_addr, block->used_length); + migration_bitmap_sync_range(block->offset, block->used_length); } rcu_read_unlock(); qemu_mutex_unlock(&migration_bitmap_mutex); @@ -589,21 +620,21 @@ static void migration_bitmap_sync(void) /* The following detection logic can be refined later. For now: Check to see if the dirtied bytes is 50% more than the approx. amount of bytes that just got transferred since the last time we - were in this routine. If that happens >N times (for now N==4) - we turn on the throttle down logic */ + were in this routine. If that happens twice, start or increase + throttling */ bytes_xfer_now = ram_bytes_transferred(); + if (s->dirty_pages_rate && (num_dirty_pages_period * TARGET_PAGE_SIZE > (bytes_xfer_now - bytes_xfer_prev)/2) && - (dirty_rate_high_cnt++ > 4)) { + (dirty_rate_high_cnt++ >= 2)) { trace_migration_throttle(); - mig_throttle_on = true; dirty_rate_high_cnt = 0; + mig_throttle_guest_down(); } bytes_xfer_prev = bytes_xfer_now; - } else { - mig_throttle_on = false; } + if (migrate_use_xbzrle()) { if (iterations_prev != acct_info.iterations) { acct_info.xbzrle_cache_miss_rate = @@ -668,12 +699,11 @@ static int ram_save_page(QEMUFile *f, RAMBlock* block, ram_addr_t offset, int pages = -1; uint64_t bytes_xmit; ram_addr_t current_addr; - MemoryRegion *mr = block->mr; uint8_t *p; int ret; bool send_async = true; - p = memory_region_get_ram_ptr(mr) + offset; + p = block->host + offset; /* In doubt sent page as normal */ bytes_xmit = 0; @@ -744,7 +774,7 @@ static int do_compress_ram_page(CompressParam *param) RAMBlock *block = param->block; ram_addr_t offset = param->offset; - p = memory_region_get_ram_ptr(block->mr) + (offset & TARGET_PAGE_MASK); + p = block->host + (offset & TARGET_PAGE_MASK); bytes_sent = save_page_header(param->file, block, offset | RAM_SAVE_FLAG_COMPRESS_PAGE); @@ -852,11 +882,10 @@ static int ram_save_compressed_page(QEMUFile *f, RAMBlock *block, { int pages = -1; uint64_t bytes_xmit; - MemoryRegion *mr = block->mr; uint8_t *p; int ret; - p = memory_region_get_ram_ptr(mr) + offset; + p = block->host + offset; bytes_xmit = 0; ret = ram_control_save_page(f, block->offset, @@ -909,6 +938,59 @@ static int ram_save_compressed_page(QEMUFile *f, RAMBlock *block, return pages; } +/* + * Find the next dirty page and update any state associated with + * the search process. + * + * Returns: True if a page is found + * + * @f: Current migration stream. + * @pss: Data about the state of the current dirty page scan. + * @*again: Set to false if the search has scanned the whole of RAM + */ +static bool find_dirty_block(QEMUFile *f, PageSearchStatus *pss, + bool *again) +{ + pss->offset = migration_bitmap_find_and_reset_dirty(pss->block, + pss->offset); + if (pss->complete_round && pss->block == last_seen_block && + pss->offset >= last_offset) { + /* + * We've been once around the RAM and haven't found anything. + * Give up. + */ + *again = false; + return false; + } + if (pss->offset >= pss->block->used_length) { + /* Didn't find anything in this RAM Block */ + pss->offset = 0; + pss->block = QLIST_NEXT_RCU(pss->block, next); + if (!pss->block) { + /* Hit the end of the list */ + pss->block = QLIST_FIRST_RCU(&ram_list.blocks); + /* Flag that we've looped */ + pss->complete_round = true; + ram_bulk_stage = false; + if (migrate_use_xbzrle()) { + /* If xbzrle is on, stop using the data compression at this + * point. In theory, xbzrle can do better than compression. + */ + flush_compressed_data(f); + compression_switch = false; + } + } + /* Didn't find anything this time, but try again on the new block */ + *again = true; + return false; + } else { + /* Can go around again, but... */ + *again = true; + /* We've found something so probably don't need to */ + return true; + } +} + /** * ram_find_and_save_block: Finds a dirty page and sends it to f * @@ -925,56 +1007,40 @@ static int ram_save_compressed_page(QEMUFile *f, RAMBlock *block, static int ram_find_and_save_block(QEMUFile *f, bool last_stage, uint64_t *bytes_transferred) { - RAMBlock *block = last_seen_block; - ram_addr_t offset = last_offset; - bool complete_round = false; + PageSearchStatus pss; int pages = 0; - MemoryRegion *mr; + bool again, found; - if (!block) - block = QLIST_FIRST_RCU(&ram_list.blocks); + pss.block = last_seen_block; + pss.offset = last_offset; + pss.complete_round = false; - while (true) { - mr = block->mr; - offset = migration_bitmap_find_and_reset_dirty(mr, offset); - if (complete_round && block == last_seen_block && - offset >= last_offset) { - break; - } - if (offset >= block->used_length) { - offset = 0; - block = QLIST_NEXT_RCU(block, next); - if (!block) { - block = QLIST_FIRST_RCU(&ram_list.blocks); - complete_round = true; - ram_bulk_stage = false; - if (migrate_use_xbzrle()) { - /* If xbzrle is on, stop using the data compression at this - * point. In theory, xbzrle can do better than compression. - */ - flush_compressed_data(f); - compression_switch = false; - } - } - } else { + if (!pss.block) { + pss.block = QLIST_FIRST_RCU(&ram_list.blocks); + } + + do { + found = find_dirty_block(f, &pss, &again); + + if (found) { if (compression_switch && migrate_use_compression()) { - pages = ram_save_compressed_page(f, block, offset, last_stage, + pages = ram_save_compressed_page(f, pss.block, pss.offset, + last_stage, bytes_transferred); } else { - pages = ram_save_page(f, block, offset, last_stage, + pages = ram_save_page(f, pss.block, pss.offset, last_stage, bytes_transferred); } /* if page is unmodified, continue to the next */ if (pages > 0) { - last_sent_block = block; - break; + last_sent_block = pss.block; } } - } + } while (!pages && again); - last_seen_block = block; - last_offset = offset; + last_seen_block = pss.block; + last_offset = pss.offset; return pages; } @@ -1101,7 +1167,6 @@ static int ram_save_setup(QEMUFile *f, void *opaque) RAMBlock *block; int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */ - mig_throttle_on = false; dirty_rate_high_cnt = 0; bitmap_sync_count = 0; migration_bitmap_sync_init(); @@ -1206,7 +1271,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) } pages_sent += pages; acct_info.iterations++; - check_guest_throttling(); + /* we want to check in the 1st loop, just in case it was the 1st time and we had to sync the dirty bitmap. qemu_get_clock_ns() is a bit expensive, so we only check each some @@ -1344,7 +1409,7 @@ static inline void *host_from_stream_offset(QEMUFile *f, return NULL; } - return memory_region_get_ram_ptr(block->mr) + offset; + return block->host + offset; } len = qemu_get_byte(f); @@ -1354,7 +1419,7 @@ static inline void *host_from_stream_offset(QEMUFile *f, QLIST_FOREACH_RCU(block, &ram_list.blocks, next) { if (!strncmp(id, block->idstr, sizeof(id)) && block->max_length > offset) { - return memory_region_get_ram_ptr(block->mr) + offset; + return block->host + offset; } } @@ -1619,52 +1684,3 @@ void ram_mig_init(void) qemu_mutex_init(&XBZRLE.lock); register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, NULL); } -/* Stub function that's gets run on the vcpu when its brought out of the - VM to run inside qemu via async_run_on_cpu()*/ - -static void mig_sleep_cpu(void *opq) -{ - qemu_mutex_unlock_iothread(); - g_usleep(30*1000); - qemu_mutex_lock_iothread(); -} - -/* To reduce the dirty rate explicitly disallow the VCPUs from spending - much time in the VM. The migration thread will try to catchup. - Workload will experience a performance drop. -*/ -static void mig_throttle_guest_down(void) -{ - CPUState *cpu; - - qemu_mutex_lock_iothread(); - CPU_FOREACH(cpu) { - async_run_on_cpu(cpu, mig_sleep_cpu, NULL); - } - qemu_mutex_unlock_iothread(); -} - -static void check_guest_throttling(void) -{ - static int64_t t0; - int64_t t1; - - if (!mig_throttle_on) { - return; - } - - if (!t0) { - t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); - return; - } - - t1 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); - - /* If it has been more than 40 ms since the last time the guest - * was throttled then do it again. - */ - if (40 < (t1-t0)/1000000) { - mig_throttle_guest_down(); - t0 = t1; - } -} diff --git a/migration/rdma.c b/migration/rdma.c index 9424834..7a7176f 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -541,7 +541,7 @@ static int rdma_add_block(RDMAContext *rdma, const char *block_name, RDMALocalBlock *block; RDMALocalBlock *old = local->block; - local->block = g_malloc0(sizeof(RDMALocalBlock) * (local->nb_blocks + 1)); + local->block = g_new0(RDMALocalBlock, local->nb_blocks + 1); if (local->nb_blocks) { int x; @@ -572,7 +572,7 @@ static int rdma_add_block(RDMAContext *rdma, const char *block_name, bitmap_clear(block->transit_bitmap, 0, block->nb_chunks); block->unregister_bitmap = bitmap_new(block->nb_chunks); bitmap_clear(block->unregister_bitmap, 0, block->nb_chunks); - block->remote_keys = g_malloc0(block->nb_chunks * sizeof(uint32_t)); + block->remote_keys = g_new0(uint32_t, block->nb_chunks); block->is_ram_block = local->init ? false : true; @@ -617,8 +617,8 @@ static int qemu_rdma_init_ram_blocks(RDMAContext *rdma) memset(local, 0, sizeof *local); qemu_ram_foreach_block(qemu_rdma_init_one_block, rdma); trace_qemu_rdma_init_ram_blocks(local->nb_blocks); - rdma->dest_blocks = (RDMADestBlock *) g_malloc0(sizeof(RDMADestBlock) * - rdma->local_ram_blocks.nb_blocks); + rdma->dest_blocks = g_new0(RDMADestBlock, + rdma->local_ram_blocks.nb_blocks); local->init = true; return 0; } @@ -677,8 +677,7 @@ static int rdma_delete_block(RDMAContext *rdma, RDMALocalBlock *block) if (local->nb_blocks > 1) { - local->block = g_malloc0(sizeof(RDMALocalBlock) * - (local->nb_blocks - 1)); + local->block = g_new0(RDMALocalBlock, local->nb_blocks - 1); if (block->index) { memcpy(local->block, old, sizeof(RDMALocalBlock) * block->index); @@ -1164,7 +1163,7 @@ static int qemu_rdma_register_and_get_keys(RDMAContext *rdma, /* allocate memory to store chunk MRs */ if (!block->pmr) { - block->pmr = g_malloc0(block->nb_chunks * sizeof(struct ibv_mr *)); + block->pmr = g_new0(struct ibv_mr *, block->nb_chunks); } /* @@ -2494,7 +2493,7 @@ static void *qemu_rdma_data_init(const char *host_port, Error **errp) InetSocketAddress *addr; if (host_port) { - rdma = g_malloc0(sizeof(RDMAContext)); + rdma = g_new0(RDMAContext, 1); rdma->current_index = -1; rdma->current_chunk = -1; @@ -2519,8 +2518,8 @@ static void *qemu_rdma_data_init(const char *host_port, Error **errp) * SEND messages for control only. * VM's ram is handled with regular RDMA messages. */ -static int qemu_rdma_put_buffer(void *opaque, const uint8_t *buf, - int64_t pos, int size) +static ssize_t qemu_rdma_put_buffer(void *opaque, const uint8_t *buf, + int64_t pos, size_t size) { QEMUFileRDMA *r = opaque; QEMUFile *f = r->file; @@ -2547,7 +2546,8 @@ static int qemu_rdma_put_buffer(void *opaque, const uint8_t *buf, r->len = MIN(remaining, RDMA_SEND_INCREMENT); remaining -= r->len; - head.len = r->len; + /* Guaranteed to fit due to RDMA_SEND_INCREMENT MIN above */ + head.len = (uint32_t)r->len; head.type = RDMA_CONTROL_QEMU_FILE; ret = qemu_rdma_exchange_send(rdma, &head, data, NULL, NULL, NULL); @@ -2564,7 +2564,7 @@ static int qemu_rdma_put_buffer(void *opaque, const uint8_t *buf, } static size_t qemu_rdma_fill(RDMAContext *rdma, uint8_t *buf, - int size, int idx) + size_t size, int idx) { size_t len = 0; @@ -2585,8 +2585,8 @@ static size_t qemu_rdma_fill(RDMAContext *rdma, uint8_t *buf, * RDMA links don't use bytestreams, so we have to * return bytes to QEMUFile opportunistically. */ -static int qemu_rdma_get_buffer(void *opaque, uint8_t *buf, - int64_t pos, int size) +static ssize_t qemu_rdma_get_buffer(void *opaque, uint8_t *buf, + int64_t pos, size_t size) { QEMUFileRDMA *r = opaque; RDMAContext *rdma = r->rdma; @@ -3399,7 +3399,7 @@ static void *qemu_fopen_rdma(RDMAContext *rdma, const char *mode) return NULL; } - r = g_malloc0(sizeof(QEMUFileRDMA)); + r = g_new0(QEMUFileRDMA, 1); r->rdma = rdma; if (mode[0] == 'w') { diff --git a/migration/savevm.c b/migration/savevm.c index 33e55fe..dbcc39a 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -138,14 +138,15 @@ static ssize_t block_writev_buffer(void *opaque, struct iovec *iov, int iovcnt, return qiov.size; } -static int block_put_buffer(void *opaque, const uint8_t *buf, - int64_t pos, int size) +static ssize_t block_put_buffer(void *opaque, const uint8_t *buf, + int64_t pos, size_t size) { bdrv_save_vmstate(opaque, buf, pos, size); return size; } -static int block_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size) +static ssize_t block_get_buffer(void *opaque, uint8_t *buf, int64_t pos, + size_t size) { return bdrv_load_vmstate(opaque, buf, pos, size); } @@ -480,7 +481,7 @@ int register_savevm_live(DeviceState *dev, { SaveStateEntry *se; - se = g_malloc0(sizeof(SaveStateEntry)); + se = g_new0(SaveStateEntry, 1); se->version_id = version_id; se->section_id = savevm_state.global_section_id++; se->ops = ops; @@ -498,7 +499,7 @@ int register_savevm_live(DeviceState *dev, pstrcat(se->idstr, sizeof(se->idstr), "/"); g_free(id); - se->compat = g_malloc0(sizeof(CompatEntry)); + se->compat = g_new0(CompatEntry, 1); pstrcpy(se->compat->idstr, sizeof(se->compat->idstr), idstr); se->compat->instance_id = instance_id == -1 ? calculate_compat_instance_id(idstr) : instance_id; @@ -526,7 +527,7 @@ int register_savevm(DeviceState *dev, LoadStateHandler *load_state, void *opaque) { - SaveVMHandlers *ops = g_malloc0(sizeof(SaveVMHandlers)); + SaveVMHandlers *ops = g_new0(SaveVMHandlers, 1); ops->save_state = save_state; ops->load_state = load_state; return register_savevm_live(dev, idstr, instance_id, version_id, @@ -568,7 +569,7 @@ int vmstate_register_with_alias_id(DeviceState *dev, int instance_id, /* If this triggers, alias support can be dropped for the vmsd. */ assert(alias_id == -1 || required_for_version >= vmsd->minimum_version_id); - se = g_malloc0(sizeof(SaveStateEntry)); + se = g_new0(SaveStateEntry, 1); se->version_id = vmsd->version_id; se->section_id = savevm_state.global_section_id++; se->opaque = opaque; @@ -582,7 +583,7 @@ int vmstate_register_with_alias_id(DeviceState *dev, int instance_id, pstrcat(se->idstr, sizeof(se->idstr), "/"); g_free(id); - se->compat = g_malloc0(sizeof(CompatEntry)); + se->compat = g_new0(CompatEntry, 1); pstrcpy(se->compat->idstr, sizeof(se->compat->idstr), vmsd->name); se->compat->instance_id = instance_id == -1 ? calculate_compat_instance_id(vmsd->name) : instance_id; @@ -1544,7 +1545,7 @@ void hmp_info_snapshots(Monitor *mon, const QDict *qdict) return; } - available_snapshots = g_malloc0(sizeof(int) * nb_sns); + available_snapshots = g_new0(int, nb_sns); total = 0; for (i = 0; i < nb_sns; i++) { sn = &sn_tab[i]; diff --git a/qapi-schema.json b/qapi-schema.json index 582a817..8b0520c 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -480,6 +480,10 @@ # may be expensive, but do not actually occur during the iterative # migration rounds themselves. (since 1.6) # +# @x-cpu-throttle-percentage: #optional percentage of time guest cpus are being +# throttled during auto-converge. This is only present when auto-converge +# has started throttling guest cpus. (Since 2.5) +# # Since: 0.14.0 ## { 'struct': 'MigrationInfo', @@ -489,7 +493,8 @@ '*total-time': 'int', '*expected-downtime': 'int', '*downtime': 'int', - '*setup-time': 'int'} } + '*setup-time': 'int', + '*x-cpu-throttle-percentage': 'int'} } ## # @query-migrate @@ -596,10 +601,18 @@ # compression, so set the decompress-threads to the number about 1/4 # of compress-threads is adequate. # +# @x-cpu-throttle-initial: Initial percentage of time guest cpus are throttled +# when migration auto-converge is activated. The +# default value is 20. (Since 2.5) +# +# @x-cpu-throttle-increment: throttle percentage increase each time +# auto-converge detects that migration is not making +# progress. The default value is 10. (Since 2.5) # Since: 2.4 ## { 'enum': 'MigrationParameter', - 'data': ['compress-level', 'compress-threads', 'decompress-threads'] } + 'data': ['compress-level', 'compress-threads', 'decompress-threads', + 'x-cpu-throttle-initial', 'x-cpu-throttle-increment'] } # # @migrate-set-parameters @@ -612,12 +625,21 @@ # # @decompress-threads: decompression thread count # +# @x-cpu-throttle-initial: Initial percentage of time guest cpus are throttled +# when migration auto-converge is activated. The +# default value is 20. (Since 2.5) +# +# @x-cpu-throttle-increment: throttle percentage increase each time +# auto-converge detects that migration is not making +# progress. The default value is 10. (Since 2.5) # Since: 2.4 ## { 'command': 'migrate-set-parameters', 'data': { '*compress-level': 'int', '*compress-threads': 'int', - '*decompress-threads': 'int'} } + '*decompress-threads': 'int', + '*x-cpu-throttle-initial': 'int', + '*x-cpu-throttle-increment': 'int'} } # # @MigrationParameters @@ -628,12 +650,22 @@ # # @decompress-threads: decompression thread count # +# @x-cpu-throttle-initial: Initial percentage of time guest cpus are throttled +# when migration auto-converge is activated. The +# default value is 20. (Since 2.5) +# +# @x-cpu-throttle-increment: throttle percentage increase each time +# auto-converge detects that migration is not making +# progress. The default value is 10. (Since 2.5) +# # Since: 2.4 ## { 'struct': 'MigrationParameters', 'data': { 'compress-level': 'int', 'compress-threads': 'int', - 'decompress-threads': 'int'} } + 'decompress-threads': 'int', + 'x-cpu-throttle-initial': 'int', + 'x-cpu-throttle-increment': 'int'} } ## # @query-migrate-parameters # @@ -657,6 +657,7 @@ void qtest_init(const char *qtest_chrdev, const char *qtest_log, Error **errp) inbuf = g_string_new(""); qtest_chr = chr; + page_size_init(); } bool qtest_driver(void) diff --git a/target-microblaze/cpu.c b/target-microblaze/cpu.c index 9ac509a..cbd84a2 100644 --- a/target-microblaze/cpu.c +++ b/target-microblaze/cpu.c @@ -107,6 +107,8 @@ static void mb_cpu_reset(CPUState *s) /* Disable stack protector. */ env->shr = ~0; + env->sregs[SR_PC] = cpu->cfg.base_vectors; + #if defined(CONFIG_USER_ONLY) /* start in user mode with interrupts enabled. */ env->sregs[SR_MSR] = MSR_EE | MSR_IE | MSR_VM | MSR_UM; @@ -183,8 +185,6 @@ static void mb_cpu_realizefn(DeviceState *dev, Error **errp) env->pvr.regs[10] = 0x0c000000; /* Default to spartan 3a dsp family. */ env->pvr.regs[11] = PVR11_USE_MMU | (16 << 17); - env->sregs[SR_PC] = cpu->cfg.base_vectors; - mcc->parent_realize(dev, errp); } diff --git a/tests/qemu-iotests/049.out b/tests/qemu-iotests/049.out index 0425ae0..a2b6703 100644 --- a/tests/qemu-iotests/049.out +++ b/tests/qemu-iotests/049.out @@ -118,6 +118,7 @@ qemu-img: kilobytes, megabytes, gigabytes, terabytes, petabytes and exabytes. qemu-img create -f qcow2 -o size=foobar TEST_DIR/t.qcow2 qemu-img: Parameter 'size' expects a size +You may use k, M, G or T suffixes for kilobytes, megabytes, gigabytes and terabytes. qemu-img: TEST_DIR/t.qcow2: Invalid options for file format 'qcow2' == Check correct interpretation of suffixes for cluster size == diff --git a/tests/qemu-iotests/128 b/tests/qemu-iotests/128 index e2a0f2f..3d8107d 100755 --- a/tests/qemu-iotests/128 +++ b/tests/qemu-iotests/128 @@ -31,6 +31,11 @@ status=1 # failure is the default! devname="eiodev$$" sudo="" +_sudo_qemu_io_wrapper() +{ + (exec $sudo "$QEMU_IO_PROG" $QEMU_IO_OPTIONS "$@") +} + _setup_eiodev() { # This test should either be run as root or with passwordless sudo @@ -76,7 +81,9 @@ TEST_IMG="/dev/mapper/$devname" echo echo "== reading from error device ==" # Opening image should succeed but the read operation should fail -$sudo $QEMU_IO --format "$IMGFMT" --nocache -c "read 0 65536" "$TEST_IMG" | _filter_qemu_io +_sudo_qemu_io_wrapper --format "$IMGFMT" --nocache \ + -c "read 0 65536" "$TEST_IMG" \ + | _filter_qemu_io # success, all done echo "*** done" diff --git a/trace-events b/trace-events index a70ea9c..36db793 100644 --- a/trace-events +++ b/trace-events @@ -1420,6 +1420,8 @@ migrate_transferred(uint64_t tranferred, uint64_t time_spent, double bandwidth, migrate_state_too_big(void) "" migrate_global_state_post_load(const char *state) "loaded state: %s" migrate_global_state_pre_save(const char *state) "saved state: %s" +migration_completion_file_err(void) "" +migration_thread_low_pending(uint64_t pending) "%" PRIu64 # migration/rdma.c qemu_rdma_accept_incoming_migration(void) "" @@ -1440,7 +1442,7 @@ qemu_rdma_exchange_get_response_none(const char *desc, int type) "Surprise: got qemu_rdma_exchange_send_issue_callback(void) "" qemu_rdma_exchange_send_waiting(const char *desc) "Waiting for response %s" qemu_rdma_exchange_send_received(const char *desc) "Response %s received." -qemu_rdma_fill(int64_t control_len, int size) "RDMA %" PRId64 " of %d bytes already in buffer" +qemu_rdma_fill(size_t control_len, size_t size) "RDMA %zd of %zd bytes already in buffer" qemu_rdma_init_ram_blocks(int blocks) "Allocated %d local ram block structures" qemu_rdma_poll_recv(const char *compstr, int64_t comp, int64_t id, int sent) "completion %s #%" PRId64 " received (%" PRId64 ") left %d" qemu_rdma_poll_write(const char *compstr, int64_t comp, int left, uint64_t block, uint64_t chunk, void *local, void *remote) "completions %s (%" PRId64 ") left %d, block %" PRIu64 ", chunk: %" PRIu64 " %p %p" @@ -304,6 +304,7 @@ static void handleAnyDeviceErrors(Error * err) - (float) cdx; - (float) cdy; - (QEMUScreen) gscreen; +- (void) raiseAllKeys; @end QemuCocoaView *cocoaView; @@ -798,6 +799,24 @@ QemuCocoaView *cocoaView; - (float) cdx {return cdx;} - (float) cdy {return cdy;} - (QEMUScreen) gscreen {return screen;} + +/* + * Makes the target think all down keys are being released. + * This prevents a stuck key problem, since we will not see + * key up events for those keys after we have lost focus. + */ +- (void) raiseAllKeys +{ + int index; + const int max_index = ARRAY_SIZE(modifiers_state); + + for (index = 0; index < max_index; index++) { + if (modifiers_state[index]) { + modifiers_state[index] = 0; + qemu_input_event_send_key_number(dcl->con, index, false); + } + } +} @end @@ -809,12 +828,11 @@ QemuCocoaView *cocoaView; */ @interface QemuCocoaAppController : NSObject #if (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6) - <NSApplicationDelegate> + <NSWindowDelegate, NSApplicationDelegate> #endif { } - (void)startEmulationWithArgc:(int)argc argv:(char**)argv; -- (void)openPanelDidEnd:(NSOpenPanel *)sheet returnCode:(NSInteger)returnCode contextInfo:(void *)contextInfo; - (void)doToggleFullScreen:(id)sender; - (void)toggleFullScreen:(id)sender; - (void)showQEMUDoc:(id)sender; @@ -829,6 +847,7 @@ QemuCocoaView *cocoaView; - (void)powerDownQEMU:(id)sender; - (void)ejectDeviceMedia:(id)sender; - (void)changeDeviceMedia:(id)sender; +- (BOOL)verifyQuit; @end @implementation QemuCocoaAppController @@ -862,6 +881,7 @@ QemuCocoaView *cocoaView; #endif [normalWindow makeKeyAndOrderFront:self]; [normalWindow center]; + [normalWindow setDelegate: self]; stretch_video = false; /* Used for displaying pause on the screen */ @@ -895,29 +915,8 @@ QemuCocoaView *cocoaView; - (void)applicationDidFinishLaunching: (NSNotification *) note { COCOA_DEBUG("QemuCocoaAppController: applicationDidFinishLaunching\n"); - - // Display an open dialog box if no arguments were passed or - // if qemu was launched from the finder ( the Finder passes "-psn" ) - if( gArgc <= 1 || strncmp ((char *)gArgv[1], "-psn", 4) == 0) { - NSOpenPanel *op = [[NSOpenPanel alloc] init]; - [op setPrompt:@"Boot image"]; - [op setMessage:@"Select the disk image you want to boot.\n\nHit the \"Cancel\" button to quit"]; -#if (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6) - [op setAllowedFileTypes:supportedImageFileTypes]; - [op beginSheetModalForWindow:normalWindow - completionHandler:^(NSInteger returnCode) - { [self openPanelDidEnd:op - returnCode:returnCode contextInfo:NULL ]; } ]; -#else - // Compatibility code for pre-10.6, using deprecated method - [op beginSheetForDirectory:nil file:nil types:filetypes - modalForWindow:normalWindow modalDelegate:self - didEndSelector:@selector(openPanelDidEnd:returnCode:contextInfo:) contextInfo:NULL]; -#endif - } else { - // or launch QEMU, with the global args - [self startEmulationWithArgc:gArgc argv:(char **)gArgv]; - } + // launch QEMU, with the global args + [self startEmulationWithArgc:gArgc argv:(char **)gArgv]; } - (void)applicationWillTerminate:(NSNotification *)aNotification @@ -933,43 +932,40 @@ QemuCocoaView *cocoaView; return YES; } -- (void)startEmulationWithArgc:(int)argc argv:(char**)argv +- (NSApplicationTerminateReply)applicationShouldTerminate: + (NSApplication *)sender { - COCOA_DEBUG("QemuCocoaAppController: startEmulationWithArgc\n"); - - int status; - status = qemu_main(argc, argv, *_NSGetEnviron()); - exit(status); + COCOA_DEBUG("QemuCocoaAppController: applicationShouldTerminate\n"); + return [self verifyQuit]; } -- (void)openPanelDidEnd:(NSOpenPanel *)sheet returnCode:(NSInteger)returnCode contextInfo:(void *)contextInfo +/* Called when the user clicks on a window's close button */ +- (BOOL)windowShouldClose:(id)sender { - COCOA_DEBUG("QemuCocoaAppController: openPanelDidEnd\n"); - - /* The NSFileHandlingPanelOKButton/NSFileHandlingPanelCancelButton values for - * returnCode strictly only apply for the 10.6-and-up beginSheetModalForWindow - * API. For the legacy pre-10.6 beginSheetForDirectory API they are NSOKButton - * and NSCancelButton. However conveniently the values are the same. - * We use the non-legacy names because the others are deprecated in OSX 10.10. + COCOA_DEBUG("QemuCocoaAppController: windowShouldClose\n"); + [NSApp terminate: sender]; + /* If the user allows the application to quit then the call to + * NSApp terminate will never return. If we get here then the user + * cancelled the quit, so we should return NO to not permit the + * closing of this window. */ - if (returnCode == NSFileHandlingPanelCancelButton) { - exit(0); - } else if (returnCode == NSFileHandlingPanelOKButton) { - char *img = (char*)[ [ [ sheet URL ] path ] cStringUsingEncoding:NSASCIIStringEncoding]; - - char **argv = g_new(char *, 4); - - [sheet close]; + return NO; +} - argv[0] = g_strdup(gArgv[0]); - argv[1] = g_strdup("-hda"); - argv[2] = g_strdup(img); - argv[3] = NULL; +/* Called when QEMU goes into the background */ +- (void) applicationWillResignActive: (NSNotification *)aNotification +{ + COCOA_DEBUG("QemuCocoaAppController: applicationWillResignActive\n"); + [cocoaView raiseAllKeys]; +} - // printf("Using argc %d argv %s -hda %s\n", 3, gArgv[0], img); +- (void)startEmulationWithArgc:(int)argc argv:(char**)argv +{ + COCOA_DEBUG("QemuCocoaAppController: startEmulationWithArgc\n"); - [self startEmulationWithArgc:3 argv:(char**)argv]; - } + int status; + status = qemu_main(argc, argv, *_NSGetEnviron()); + exit(status); } /* We abstract the method called by the Enter Fullscreen menu item @@ -1125,6 +1121,21 @@ QemuCocoaView *cocoaView; } } +/* Verifies if the user really wants to quit */ +- (BOOL)verifyQuit +{ + NSAlert *alert = [NSAlert new]; + [alert autorelease]; + [alert setMessageText: @"Are you sure you want to quit QEMU?"]; + [alert addButtonWithTitle: @"Cancel"]; + [alert addButtonWithTitle: @"Quit"]; + if([alert runModal] == NSAlertSecondButtonReturn) { + return YES; + } else { + return NO; + } +} + @end @@ -580,6 +580,7 @@ static const RunStateTransition runstate_transitions_def[] = { { RUN_STATE_INMIGRATE, RUN_STATE_SUSPENDED }, { RUN_STATE_INMIGRATE, RUN_STATE_WATCHDOG }, { RUN_STATE_INMIGRATE, RUN_STATE_GUEST_PANICKED }, + { RUN_STATE_INMIGRATE, RUN_STATE_FINISH_MIGRATE }, { RUN_STATE_INTERNAL_ERROR, RUN_STATE_PAUSED }, { RUN_STATE_INTERNAL_ERROR, RUN_STATE_FINISH_MIGRATE }, |