diff options
39 files changed, 2537 insertions, 247 deletions
diff --git a/QMP/qmp-events.txt b/QMP/qmp-events.txt index 987c575..b2698e4 100644 --- a/QMP/qmp-events.txt +++ b/QMP/qmp-events.txt @@ -118,6 +118,24 @@ Example: "action": "stop" }, "timestamp": { "seconds": 1265044230, "microseconds": 450486 } } +BLOCK_JOB_READY +--------------- + +Emitted when a block job is ready to complete. + +Data: + +- "device": device name (json-string) + +Example: + +{ "event": "BLOCK_JOB_READY", + "data": { "device": "ide0-hd1" }, + "timestamp": { "seconds": 1265044230, "microseconds": 450486 } } + +Note: The "ready to complete" status is always reset by a BLOCK_JOB_ERROR +event. + DEVICE_TRAY_MOVED ----------------- @@ -96,6 +96,7 @@ class QEMUMonitorProtocol: @raise QMPCapabilitiesError if fails to negotiate capabilities """ self.__sock, _ = self.__sock.accept() + self.__sockfile = self.__sock.makefile() return self.__negotiate_capabilities() def cmd_obj(self, qmp_cmd): @@ -135,6 +136,26 @@ class QEMUMonitorProtocol: raise Exception(ret['error']['desc']) return ret['return'] + def pull_event(self, wait=False): + """ + Get and delete the first available QMP event. + + @param wait: block until an event is available (bool) + """ + self.__sock.setblocking(0) + try: + self.__json_read() + except socket.error, err: + if err[0] == errno.EAGAIN: + # No data available + pass + self.__sock.setblocking(1) + if not self.__events and wait: + self.__json_read(only_event=True) + event = self.__events[0] + del self.__events[0] + return event + def get_events(self, wait=False): """ Get a list of available QMP events. @@ -387,7 +387,8 @@ int bdrv_create(BlockDriver *drv, const char* filename, }; if (!drv->bdrv_create) { - return -ENOTSUP; + ret = -ENOTSUP; + goto out; } if (qemu_in_coroutine()) { @@ -402,8 +403,9 @@ int bdrv_create(BlockDriver *drv, const char* filename, } ret = cco.ret; - g_free(cco.filename); +out: + g_free(cco.filename); return ret; } @@ -742,6 +744,42 @@ int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags) return 0; } +int bdrv_open_backing_file(BlockDriverState *bs) +{ + char backing_filename[PATH_MAX]; + int back_flags, ret; + BlockDriver *back_drv = NULL; + + if (bs->backing_hd != NULL) { + return 0; + } + + bs->open_flags &= ~BDRV_O_NO_BACKING; + if (bs->backing_file[0] == '\0') { + return 0; + } + + bs->backing_hd = bdrv_new(""); + bdrv_get_full_backing_filename(bs, backing_filename, + sizeof(backing_filename)); + + if (bs->backing_format[0] != '\0') { + back_drv = bdrv_find_format(bs->backing_format); + } + + /* backing files always opened read-only */ + back_flags = bs->open_flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT); + + ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv); + if (ret < 0) { + bdrv_delete(bs->backing_hd); + bs->backing_hd = NULL; + bs->open_flags |= BDRV_O_NO_BACKING; + return ret; + } + return 0; +} + /* * Opens a disk image (raw, qcow2, vmdk, ...) */ @@ -829,24 +867,8 @@ int bdrv_open(BlockDriverState *bs, const char *filename, int flags, } /* If there is a backing file, use it */ - if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') { - char backing_filename[PATH_MAX]; - int back_flags; - BlockDriver *back_drv = NULL; - - bs->backing_hd = bdrv_new(""); - bdrv_get_full_backing_filename(bs, backing_filename, - sizeof(backing_filename)); - - if (bs->backing_format[0] != '\0') { - back_drv = bdrv_find_format(bs->backing_format); - } - - /* backing files always opened read-only */ - back_flags = - flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING); - - ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv); + if ((flags & BDRV_O_NO_BACKING) == 0) { + ret = bdrv_open_backing_file(bs); if (ret < 0) { bdrv_close(bs); return ret; @@ -2378,7 +2400,7 @@ static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs, } if (bs->dirty_bitmap) { - set_dirty_bitmap(bs, sector_num, nb_sectors, 1); + bdrv_set_dirty(bs, sector_num, nb_sectors); } if (bs->wr_highest_sector < sector_num + nb_sectors - 1) { @@ -2806,76 +2828,82 @@ int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *top, return 0; } -BlockInfoList *qmp_query_block(Error **errp) +BlockInfo *bdrv_query_info(BlockDriverState *bs) { - BlockInfoList *head = NULL, *cur_item = NULL; - BlockDriverState *bs; + BlockInfo *info = g_malloc0(sizeof(*info)); + info->device = g_strdup(bs->device_name); + info->type = g_strdup("unknown"); + info->locked = bdrv_dev_is_medium_locked(bs); + info->removable = bdrv_dev_has_removable_media(bs); - QTAILQ_FOREACH(bs, &bdrv_states, list) { - BlockInfoList *info = g_malloc0(sizeof(*info)); + if (bdrv_dev_has_removable_media(bs)) { + info->has_tray_open = true; + info->tray_open = bdrv_dev_is_tray_open(bs); + } + + if (bdrv_iostatus_is_enabled(bs)) { + info->has_io_status = true; + info->io_status = bs->iostatus; + } - info->value = g_malloc0(sizeof(*info->value)); - info->value->device = g_strdup(bs->device_name); - info->value->type = g_strdup("unknown"); - info->value->locked = bdrv_dev_is_medium_locked(bs); - info->value->removable = bdrv_dev_has_removable_media(bs); + if (bs->dirty_bitmap) { + info->has_dirty = true; + info->dirty = g_malloc0(sizeof(*info->dirty)); + info->dirty->count = bdrv_get_dirty_count(bs) * + BDRV_SECTORS_PER_DIRTY_CHUNK * BDRV_SECTOR_SIZE; + } - if (bdrv_dev_has_removable_media(bs)) { - info->value->has_tray_open = true; - info->value->tray_open = bdrv_dev_is_tray_open(bs); + if (bs->drv) { + info->has_inserted = true; + info->inserted = g_malloc0(sizeof(*info->inserted)); + info->inserted->file = g_strdup(bs->filename); + info->inserted->ro = bs->read_only; + info->inserted->drv = g_strdup(bs->drv->format_name); + info->inserted->encrypted = bs->encrypted; + info->inserted->encryption_key_missing = bdrv_key_required(bs); + + if (bs->backing_file[0]) { + info->inserted->has_backing_file = true; + info->inserted->backing_file = g_strdup(bs->backing_file); } - if (bdrv_iostatus_is_enabled(bs)) { - info->value->has_io_status = true; - info->value->io_status = bs->iostatus; + info->inserted->backing_file_depth = bdrv_get_backing_file_depth(bs); + + if (bs->io_limits_enabled) { + info->inserted->bps = + bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]; + info->inserted->bps_rd = + bs->io_limits.bps[BLOCK_IO_LIMIT_READ]; + info->inserted->bps_wr = + bs->io_limits.bps[BLOCK_IO_LIMIT_WRITE]; + info->inserted->iops = + bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]; + info->inserted->iops_rd = + bs->io_limits.iops[BLOCK_IO_LIMIT_READ]; + info->inserted->iops_wr = + bs->io_limits.iops[BLOCK_IO_LIMIT_WRITE]; } + } + return info; +} - if (bs->drv) { - info->value->has_inserted = true; - info->value->inserted = g_malloc0(sizeof(*info->value->inserted)); - info->value->inserted->file = g_strdup(bs->filename); - info->value->inserted->ro = bs->read_only; - info->value->inserted->drv = g_strdup(bs->drv->format_name); - info->value->inserted->encrypted = bs->encrypted; - info->value->inserted->encryption_key_missing = bdrv_key_required(bs); - if (bs->backing_file[0]) { - info->value->inserted->has_backing_file = true; - info->value->inserted->backing_file = g_strdup(bs->backing_file); - } +BlockInfoList *qmp_query_block(Error **errp) +{ + BlockInfoList *head = NULL, **p_next = &head; + BlockDriverState *bs; - info->value->inserted->backing_file_depth = - bdrv_get_backing_file_depth(bs); - - if (bs->io_limits_enabled) { - info->value->inserted->bps = - bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]; - info->value->inserted->bps_rd = - bs->io_limits.bps[BLOCK_IO_LIMIT_READ]; - info->value->inserted->bps_wr = - bs->io_limits.bps[BLOCK_IO_LIMIT_WRITE]; - info->value->inserted->iops = - bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]; - info->value->inserted->iops_rd = - bs->io_limits.iops[BLOCK_IO_LIMIT_READ]; - info->value->inserted->iops_wr = - bs->io_limits.iops[BLOCK_IO_LIMIT_WRITE]; - } - } + QTAILQ_FOREACH(bs, &bdrv_states, list) { + BlockInfoList *info = g_malloc0(sizeof(*info)); + info->value = bdrv_query_info(bs); - /* XXX: waiting for the qapi to support GSList */ - if (!cur_item) { - head = cur_item = info; - } else { - cur_item->next = info; - cur_item = info; - } + *p_next = info; + p_next = &info->next; } return head; } -/* Consider exposing this as a full fledged QMP command */ -static BlockStats *qmp_query_blockstat(const BlockDriverState *bs, Error **errp) +BlockStats *bdrv_query_stats(const BlockDriverState *bs) { BlockStats *s; @@ -2899,7 +2927,7 @@ static BlockStats *qmp_query_blockstat(const BlockDriverState *bs, Error **errp) if (bs->file) { s->has_parent = true; - s->parent = qmp_query_blockstat(bs->file, NULL); + s->parent = bdrv_query_stats(bs->file); } return s; @@ -2907,20 +2935,15 @@ static BlockStats *qmp_query_blockstat(const BlockDriverState *bs, Error **errp) BlockStatsList *qmp_query_blockstats(Error **errp) { - BlockStatsList *head = NULL, *cur_item = NULL; + BlockStatsList *head = NULL, **p_next = &head; BlockDriverState *bs; QTAILQ_FOREACH(bs, &bdrv_states, list) { BlockStatsList *info = g_malloc0(sizeof(*info)); - info->value = qmp_query_blockstat(bs, NULL); + info->value = bdrv_query_stats(bs); - /* XXX: waiting for the qapi to support GSList */ - if (!cur_item) { - head = cur_item = info; - } else { - cur_item->next = info; - cur_item = info; - } + *p_next = info; + p_next = &info->next; } return head; @@ -2953,9 +2976,7 @@ int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num, if (bdrv_check_request(bs, sector_num, nb_sectors)) return -EIO; - if (bs->dirty_bitmap) { - set_dirty_bitmap(bs, sector_num, nb_sectors, 1); - } + assert(!bs->dirty_bitmap); return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors); } @@ -3132,22 +3153,70 @@ int bdrv_snapshot_load_tmp(BlockDriverState *bs, return -ENOTSUP; } +/* backing_file can either be relative, or absolute, or a protocol. If it is + * relative, it must be relative to the chain. So, passing in bs->filename + * from a BDS as backing_file should not be done, as that may be relative to + * the CWD rather than the chain. */ BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs, const char *backing_file) { - if (!bs->drv) { + char *filename_full = NULL; + char *backing_file_full = NULL; + char *filename_tmp = NULL; + int is_protocol = 0; + BlockDriverState *curr_bs = NULL; + BlockDriverState *retval = NULL; + + if (!bs || !bs->drv || !backing_file) { return NULL; } - if (bs->backing_hd) { - if (strcmp(bs->backing_file, backing_file) == 0) { - return bs->backing_hd; + filename_full = g_malloc(PATH_MAX); + backing_file_full = g_malloc(PATH_MAX); + filename_tmp = g_malloc(PATH_MAX); + + is_protocol = path_has_protocol(backing_file); + + for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) { + + /* If either of the filename paths is actually a protocol, then + * compare unmodified paths; otherwise make paths relative */ + if (is_protocol || path_has_protocol(curr_bs->backing_file)) { + if (strcmp(backing_file, curr_bs->backing_file) == 0) { + retval = curr_bs->backing_hd; + break; + } } else { - return bdrv_find_backing_image(bs->backing_hd, backing_file); + /* If not an absolute filename path, make it relative to the current + * image's filename path */ + path_combine(filename_tmp, PATH_MAX, curr_bs->filename, + backing_file); + + /* We are going to compare absolute pathnames */ + if (!realpath(filename_tmp, filename_full)) { + continue; + } + + /* We need to make sure the backing filename we are comparing against + * is relative to the current image filename (or absolute) */ + path_combine(filename_tmp, PATH_MAX, curr_bs->filename, + curr_bs->backing_file); + + if (!realpath(filename_tmp, backing_file_full)) { + continue; + } + + if (strcmp(backing_file_full, filename_full) == 0) { + retval = curr_bs->backing_hd; + break; + } } } - return NULL; + g_free(filename_full); + g_free(backing_file_full); + g_free(filename_tmp); + return retval; } int bdrv_get_backing_file_depth(BlockDriverState *bs) @@ -4214,13 +4283,54 @@ int bdrv_get_dirty(BlockDriverState *bs, int64_t sector) if (bs->dirty_bitmap && (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) { - return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] & - (1UL << (chunk % (sizeof(unsigned long) * 8)))); + return !!(bs->dirty_bitmap[chunk / BITS_PER_LONG] & + (1UL << (chunk % BITS_PER_LONG))); } else { return 0; } } +int64_t bdrv_get_next_dirty(BlockDriverState *bs, int64_t sector) +{ + int64_t chunk; + int bit, elem; + + /* Avoid an infinite loop. */ + assert(bs->dirty_count > 0); + + sector = (sector | (BDRV_SECTORS_PER_DIRTY_CHUNK - 1)) + 1; + chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK; + + QEMU_BUILD_BUG_ON(sizeof(bs->dirty_bitmap[0]) * 8 != BITS_PER_LONG); + elem = chunk / BITS_PER_LONG; + bit = chunk % BITS_PER_LONG; + for (;;) { + if (sector >= bs->total_sectors) { + sector = 0; + bit = elem = 0; + } + if (bit == 0 && bs->dirty_bitmap[elem] == 0) { + sector += BDRV_SECTORS_PER_DIRTY_CHUNK * BITS_PER_LONG; + elem++; + } else { + if (bs->dirty_bitmap[elem] & (1UL << bit)) { + return sector; + } + sector += BDRV_SECTORS_PER_DIRTY_CHUNK; + if (++bit == BITS_PER_LONG) { + bit = 0; + elem++; + } + } + } +} + +void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, + int nr_sectors) +{ + set_dirty_bitmap(bs, cur_sector, nr_sectors, 1); +} + void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors) { @@ -4268,6 +4378,9 @@ void bdrv_iostatus_reset(BlockDriverState *bs) { if (bdrv_iostatus_is_enabled(bs)) { bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK; + if (bs->job) { + block_job_iostatus_reset(bs->job); + } } } @@ -133,6 +133,7 @@ void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top); void bdrv_delete(BlockDriverState *bs); int bdrv_parse_cache_flags(const char *mode, int *flags); int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags); +int bdrv_open_backing_file(BlockDriverState *bs); int bdrv_open(BlockDriverState *bs, const char *filename, int flags, BlockDriver *drv); BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, @@ -314,6 +315,8 @@ void bdrv_get_backing_filename(BlockDriverState *bs, char *filename, int filename_size); void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz); +BlockInfo *bdrv_query_info(BlockDriverState *s); +BlockStats *bdrv_query_stats(const BlockDriverState *bs); int bdrv_can_snapshot(BlockDriverState *bs); int bdrv_is_snapshot(BlockDriverState *bs); BlockDriverState *bdrv_snapshots(void); @@ -351,8 +354,9 @@ void *qemu_blockalign(BlockDriverState *bs, size_t size); void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable); int bdrv_get_dirty(BlockDriverState *bs, int64_t sector); -void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, - int nr_sectors); +void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors); +void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors); +int64_t bdrv_get_next_dirty(BlockDriverState *bs, int64_t sector); int64_t bdrv_get_dirty_count(BlockDriverState *bs); void bdrv_enable_copy_on_read(BlockDriverState *bs); diff --git a/block/Makefile.objs b/block/Makefile.objs index 554f429..806e526 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -12,3 +12,4 @@ block-obj-$(CONFIG_GLUSTERFS) += gluster.o common-obj-y += stream.o common-obj-y += commit.o +common-obj-y += mirror.o diff --git a/block/commit.c b/block/commit.c index 733c914..fae7958 100644 --- a/block/commit.c +++ b/block/commit.c @@ -160,7 +160,7 @@ exit_restore_reopen: bdrv_reopen(overlay_bs, s->orig_overlay_flags, NULL); } - block_job_complete(&s->common, ret); + block_job_completed(&s->common, ret); } static void commit_set_speed(BlockJob *job, int64_t speed, Error **errp) @@ -211,15 +211,6 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base, return; } - /* top and base may be valid, but let's make sure that base is reachable - * from top */ - if (bdrv_find_backing_image(top, base->filename) != base) { - error_setg(errp, - "Base (%s) is not reachable from top (%s)", - base->filename, top->filename); - return; - } - overlay_bs = bdrv_find_overlay(bs, top); if (overlay_bs == NULL) { diff --git a/block/mirror.c b/block/mirror.c new file mode 100644 index 0000000..d6618a4 --- /dev/null +++ b/block/mirror.c @@ -0,0 +1,322 @@ +/* + * Image mirroring + * + * Copyright Red Hat, Inc. 2012 + * + * Authors: + * Paolo Bonzini <pbonzini@redhat.com> + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#include "trace.h" +#include "blockjob.h" +#include "block_int.h" +#include "qemu/ratelimit.h" + +enum { + /* + * Size of data buffer for populating the image file. This should be large + * enough to process multiple clusters in a single call, so that populating + * contiguous regions of the image is efficient. + */ + BLOCK_SIZE = 512 * BDRV_SECTORS_PER_DIRTY_CHUNK, /* in bytes */ +}; + +#define SLICE_TIME 100000000ULL /* ns */ + +typedef struct MirrorBlockJob { + BlockJob common; + RateLimit limit; + BlockDriverState *target; + MirrorSyncMode mode; + BlockdevOnError on_source_error, on_target_error; + bool synced; + bool should_complete; + int64_t sector_num; + uint8_t *buf; +} MirrorBlockJob; + +static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read, + int error) +{ + s->synced = false; + if (read) { + return block_job_error_action(&s->common, s->common.bs, + s->on_source_error, true, error); + } else { + return block_job_error_action(&s->common, s->target, + s->on_target_error, false, error); + } +} + +static int coroutine_fn mirror_iteration(MirrorBlockJob *s, + BlockErrorAction *p_action) +{ + BlockDriverState *source = s->common.bs; + BlockDriverState *target = s->target; + QEMUIOVector qiov; + int ret, nb_sectors; + int64_t end; + struct iovec iov; + + end = s->common.len >> BDRV_SECTOR_BITS; + s->sector_num = bdrv_get_next_dirty(source, s->sector_num); + nb_sectors = MIN(BDRV_SECTORS_PER_DIRTY_CHUNK, end - s->sector_num); + bdrv_reset_dirty(source, s->sector_num, nb_sectors); + + /* Copy the dirty cluster. */ + iov.iov_base = s->buf; + iov.iov_len = nb_sectors * 512; + qemu_iovec_init_external(&qiov, &iov, 1); + + trace_mirror_one_iteration(s, s->sector_num, nb_sectors); + ret = bdrv_co_readv(source, s->sector_num, nb_sectors, &qiov); + if (ret < 0) { + *p_action = mirror_error_action(s, true, -ret); + goto fail; + } + ret = bdrv_co_writev(target, s->sector_num, nb_sectors, &qiov); + if (ret < 0) { + *p_action = mirror_error_action(s, false, -ret); + s->synced = false; + goto fail; + } + return 0; + +fail: + /* Try again later. */ + bdrv_set_dirty(source, s->sector_num, nb_sectors); + return ret; +} + +static void coroutine_fn mirror_run(void *opaque) +{ + MirrorBlockJob *s = opaque; + BlockDriverState *bs = s->common.bs; + int64_t sector_num, end; + int ret = 0; + int n; + + if (block_job_is_cancelled(&s->common)) { + goto immediate_exit; + } + + s->common.len = bdrv_getlength(bs); + if (s->common.len < 0) { + block_job_completed(&s->common, s->common.len); + return; + } + + end = s->common.len >> BDRV_SECTOR_BITS; + s->buf = qemu_blockalign(bs, BLOCK_SIZE); + + if (s->mode != MIRROR_SYNC_MODE_NONE) { + /* First part, loop on the sectors and initialize the dirty bitmap. */ + BlockDriverState *base; + base = s->mode == MIRROR_SYNC_MODE_FULL ? NULL : bs->backing_hd; + for (sector_num = 0; sector_num < end; ) { + int64_t next = (sector_num | (BDRV_SECTORS_PER_DIRTY_CHUNK - 1)) + 1; + ret = bdrv_co_is_allocated_above(bs, base, + sector_num, next - sector_num, &n); + + if (ret < 0) { + goto immediate_exit; + } + + assert(n > 0); + if (ret == 1) { + bdrv_set_dirty(bs, sector_num, n); + sector_num = next; + } else { + sector_num += n; + } + } + } + + s->sector_num = -1; + for (;;) { + uint64_t delay_ns; + int64_t cnt; + bool should_complete; + + cnt = bdrv_get_dirty_count(bs); + if (cnt != 0) { + BlockErrorAction action = BDRV_ACTION_REPORT; + ret = mirror_iteration(s, &action); + if (ret < 0 && action == BDRV_ACTION_REPORT) { + goto immediate_exit; + } + cnt = bdrv_get_dirty_count(bs); + } + + should_complete = false; + if (cnt == 0) { + trace_mirror_before_flush(s); + ret = bdrv_flush(s->target); + if (ret < 0) { + if (mirror_error_action(s, false, -ret) == BDRV_ACTION_REPORT) { + goto immediate_exit; + } + } else { + /* We're out of the streaming phase. From now on, if the job + * is cancelled we will actually complete all pending I/O and + * report completion. This way, block-job-cancel will leave + * the target in a consistent state. + */ + s->common.offset = end * BDRV_SECTOR_SIZE; + if (!s->synced) { + block_job_ready(&s->common); + s->synced = true; + } + + should_complete = s->should_complete || + block_job_is_cancelled(&s->common); + cnt = bdrv_get_dirty_count(bs); + } + } + + if (cnt == 0 && should_complete) { + /* The dirty bitmap is not updated while operations are pending. + * If we're about to exit, wait for pending operations before + * calling bdrv_get_dirty_count(bs), or we may exit while the + * source has dirty data to copy! + * + * Note that I/O can be submitted by the guest while + * mirror_populate runs. + */ + trace_mirror_before_drain(s, cnt); + bdrv_drain_all(); + cnt = bdrv_get_dirty_count(bs); + } + + ret = 0; + trace_mirror_before_sleep(s, cnt, s->synced); + if (!s->synced) { + /* Publish progress */ + s->common.offset = end * BDRV_SECTOR_SIZE - cnt * BLOCK_SIZE; + + if (s->common.speed) { + delay_ns = ratelimit_calculate_delay(&s->limit, BDRV_SECTORS_PER_DIRTY_CHUNK); + } else { + delay_ns = 0; + } + + /* Note that even when no rate limit is applied we need to yield + * with no pending I/O here so that qemu_aio_flush() returns. + */ + block_job_sleep_ns(&s->common, rt_clock, delay_ns); + if (block_job_is_cancelled(&s->common)) { + break; + } + } else if (!should_complete) { + delay_ns = (cnt == 0 ? SLICE_TIME : 0); + block_job_sleep_ns(&s->common, rt_clock, delay_ns); + } else if (cnt == 0) { + /* The two disks are in sync. Exit and report successful + * completion. + */ + assert(QLIST_EMPTY(&bs->tracked_requests)); + s->common.cancelled = false; + break; + } + } + +immediate_exit: + g_free(s->buf); + bdrv_set_dirty_tracking(bs, false); + bdrv_iostatus_disable(s->target); + if (s->should_complete && ret == 0) { + if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) { + bdrv_reopen(s->target, bdrv_get_flags(s->common.bs), NULL); + } + bdrv_swap(s->target, s->common.bs); + } + bdrv_close(s->target); + bdrv_delete(s->target); + block_job_completed(&s->common, ret); +} + +static void mirror_set_speed(BlockJob *job, int64_t speed, Error **errp) +{ + MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); + + if (speed < 0) { + error_set(errp, QERR_INVALID_PARAMETER, "speed"); + return; + } + ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME); +} + +static void mirror_iostatus_reset(BlockJob *job) +{ + MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); + + bdrv_iostatus_reset(s->target); +} + +static void mirror_complete(BlockJob *job, Error **errp) +{ + MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); + int ret; + + ret = bdrv_open_backing_file(s->target); + if (ret < 0) { + char backing_filename[PATH_MAX]; + bdrv_get_full_backing_filename(s->target, backing_filename, + sizeof(backing_filename)); + error_set(errp, QERR_OPEN_FILE_FAILED, backing_filename); + return; + } + if (!s->synced) { + error_set(errp, QERR_BLOCK_JOB_NOT_READY, job->bs->device_name); + return; + } + + s->should_complete = true; + block_job_resume(job); +} + +static BlockJobType mirror_job_type = { + .instance_size = sizeof(MirrorBlockJob), + .job_type = "mirror", + .set_speed = mirror_set_speed, + .iostatus_reset= mirror_iostatus_reset, + .complete = mirror_complete, +}; + +void mirror_start(BlockDriverState *bs, BlockDriverState *target, + int64_t speed, MirrorSyncMode mode, + BlockdevOnError on_source_error, + BlockdevOnError on_target_error, + BlockDriverCompletionFunc *cb, + void *opaque, Error **errp) +{ + MirrorBlockJob *s; + + if ((on_source_error == BLOCKDEV_ON_ERROR_STOP || + on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) && + !bdrv_iostatus_is_enabled(bs)) { + error_set(errp, QERR_INVALID_PARAMETER, "on-source-error"); + return; + } + + s = block_job_create(&mirror_job_type, bs, speed, cb, opaque, errp); + if (!s) { + return; + } + + s->on_source_error = on_source_error; + s->on_target_error = on_target_error; + s->target = target; + s->mode = mode; + bdrv_set_dirty_tracking(bs, true); + bdrv_set_enable_write_cache(s->target, true); + bdrv_set_on_error(s->target, on_target_error, on_target_error); + bdrv_iostatus_enable(s->target); + s->common.co = qemu_coroutine_create(mirror_run); + trace_mirror_start(bs, s, s->common.co, opaque); + qemu_coroutine_enter(s->common.co, s); +} diff --git a/block/stream.c b/block/stream.c index 7926652..0c0fc7a 100644 --- a/block/stream.c +++ b/block/stream.c @@ -86,7 +86,7 @@ static void coroutine_fn stream_run(void *opaque) s->common.len = bdrv_getlength(bs); if (s->common.len < 0) { - block_job_complete(&s->common, s->common.len); + block_job_completed(&s->common, s->common.len); return; } @@ -184,7 +184,7 @@ wait: } qemu_vfree(buf); - block_job_complete(&s->common, ret); + block_job_completed(&s->common, ret); } static void stream_set_speed(BlockJob *job, int64_t speed, Error **errp) diff --git a/block_int.h b/block_int.h index cedabbd..9deedb8 100644 --- a/block_int.h +++ b/block_int.h @@ -333,4 +333,28 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base, BlockdevOnError on_error, BlockDriverCompletionFunc *cb, void *opaque, Error **errp); +/* + * mirror_start: + * @bs: Block device to operate on. + * @target: Block device to write to. + * @speed: The maximum speed, in bytes per second, or 0 for unlimited. + * @mode: Whether to collapse all images in the chain to the target. + * @on_source_error: The action to take upon error reading from the source. + * @on_target_error: The action to take upon error writing to the target. + * @cb: Completion function for the job. + * @opaque: Opaque pointer value passed to @cb. + * @errp: Error object. + * + * Start a mirroring operation on @bs. Clusters that are allocated + * in @bs will be written to @bs until the job is cancelled or + * manually completed. At the end of a successful mirroring job, + * @bs will be switched to read from @target. + */ +void mirror_start(BlockDriverState *bs, BlockDriverState *target, + int64_t speed, MirrorSyncMode mode, + BlockdevOnError on_source_error, + BlockdevOnError on_target_error, + BlockDriverCompletionFunc *cb, + void *opaque, Error **errp); + #endif /* BLOCK_INT_H */ @@ -1056,20 +1056,6 @@ void qmp_block_resize(const char *device, int64_t size, Error **errp) } } -static QObject *qobject_from_block_job(BlockJob *job) -{ - return qobject_from_jsonf("{ 'type': %s," - "'device': %s," - "'len': %" PRId64 "," - "'offset': %" PRId64 "," - "'speed': %" PRId64 " }", - job->job_type->job_type, - bdrv_get_device_name(job->bs), - job->len, - job->offset, - job->speed); -} - static void block_job_cb(void *opaque, int ret) { BlockDriverState *bs = opaque; @@ -1157,16 +1143,6 @@ void qmp_block_commit(const char *device, error_set(errp, QERR_DEVICE_NOT_FOUND, device); return; } - if (base && has_base) { - base_bs = bdrv_find_backing_image(bs, base); - } else { - base_bs = bdrv_find_base(bs); - } - - if (base_bs == NULL) { - error_set(errp, QERR_BASE_NOT_FOUND, base ? base : "NULL"); - return; - } /* default top_bs is the active layer */ top_bs = bs; @@ -1182,6 +1158,17 @@ void qmp_block_commit(const char *device, return; } + if (has_base && base) { + base_bs = bdrv_find_backing_image(top_bs, base); + } else { + base_bs = bdrv_find_base(top_bs); + } + + if (base_bs == NULL) { + error_set(errp, QERR_BASE_NOT_FOUND, base ? base : "NULL"); + return; + } + commit_start(bs, base_bs, top_bs, speed, on_error, block_job_cb, bs, &local_err); if (local_err != NULL) { @@ -1194,6 +1181,140 @@ void qmp_block_commit(const char *device, drive_get_ref(drive_get_by_blockdev(bs)); } +void qmp_drive_mirror(const char *device, const char *target, + bool has_format, const char *format, + enum MirrorSyncMode sync, + bool has_mode, enum NewImageMode mode, + bool has_speed, int64_t speed, + bool has_on_source_error, BlockdevOnError on_source_error, + bool has_on_target_error, BlockdevOnError on_target_error, + Error **errp) +{ + BlockDriverInfo bdi; + BlockDriverState *bs; + BlockDriverState *source, *target_bs; + BlockDriver *proto_drv; + BlockDriver *drv = NULL; + Error *local_err = NULL; + int flags; + uint64_t size; + int ret; + + if (!has_speed) { + speed = 0; + } + if (!has_on_source_error) { + on_source_error = BLOCKDEV_ON_ERROR_REPORT; + } + if (!has_on_target_error) { + on_target_error = BLOCKDEV_ON_ERROR_REPORT; + } + if (!has_mode) { + mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS; + } + + bs = bdrv_find(device); + if (!bs) { + error_set(errp, QERR_DEVICE_NOT_FOUND, device); + return; + } + + if (!bdrv_is_inserted(bs)) { + error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, device); + return; + } + + if (!has_format) { + format = mode == NEW_IMAGE_MODE_EXISTING ? NULL : bs->drv->format_name; + } + if (format) { + drv = bdrv_find_format(format); + if (!drv) { + error_set(errp, QERR_INVALID_BLOCK_FORMAT, format); + return; + } + } + + if (bdrv_in_use(bs)) { + error_set(errp, QERR_DEVICE_IN_USE, device); + return; + } + + flags = bs->open_flags | BDRV_O_RDWR; + source = bs->backing_hd; + if (!source && sync == MIRROR_SYNC_MODE_TOP) { + sync = MIRROR_SYNC_MODE_FULL; + } + + proto_drv = bdrv_find_protocol(target); + if (!proto_drv) { + error_set(errp, QERR_INVALID_BLOCK_FORMAT, format); + return; + } + + if (sync == MIRROR_SYNC_MODE_FULL && mode != NEW_IMAGE_MODE_EXISTING) { + /* create new image w/o backing file */ + assert(format && drv); + bdrv_get_geometry(bs, &size); + size *= 512; + ret = bdrv_img_create(target, format, + NULL, NULL, NULL, size, flags); + } else { + switch (mode) { + case NEW_IMAGE_MODE_EXISTING: + ret = 0; + break; + case NEW_IMAGE_MODE_ABSOLUTE_PATHS: + /* create new image with backing file */ + ret = bdrv_img_create(target, format, + source->filename, + source->drv->format_name, + NULL, -1, flags); + break; + default: + abort(); + } + } + + if (ret) { + error_set(errp, QERR_OPEN_FILE_FAILED, target); + return; + } + + target_bs = bdrv_new(""); + ret = bdrv_open(target_bs, target, flags | BDRV_O_NO_BACKING, drv); + + if (ret < 0) { + bdrv_delete(target_bs); + error_set(errp, QERR_OPEN_FILE_FAILED, target); + return; + } + + /* We need a backing file if we will copy parts of a cluster. */ + if (bdrv_get_info(target_bs, &bdi) >= 0 && bdi.cluster_size != 0 && + bdi.cluster_size >= BDRV_SECTORS_PER_DIRTY_CHUNK * 512) { + ret = bdrv_open_backing_file(target_bs); + if (ret < 0) { + bdrv_delete(target_bs); + error_set(errp, QERR_OPEN_FILE_FAILED, target); + return; + } + } + + mirror_start(bs, target_bs, speed, sync, on_source_error, on_target_error, + block_job_cb, bs, &local_err); + if (local_err != NULL) { + bdrv_delete(target_bs); + error_propagate(errp, local_err); + return; + } + + /* Grab a reference so hotplug does not delete the BlockDriverState from + * underneath us. + */ + drive_get_ref(drive_get_by_blockdev(bs)); +} + static BlockJob *find_block_job(const char *device) { BlockDriverState *bs; @@ -1265,6 +1386,19 @@ void qmp_block_job_resume(const char *device, Error **errp) block_job_resume(job); } +void qmp_block_job_complete(const char *device, Error **errp) +{ + BlockJob *job = find_block_job(device); + + if (!job) { + error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device); + return; + } + + trace_qmp_block_job_complete(job); + block_job_complete(job, errp); +} + static void do_qmp_query_block_jobs_one(void *opaque, BlockDriverState *bs) { BlockJobInfoList **prev = opaque; @@ -71,7 +71,7 @@ void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs, return job; } -void block_job_complete(BlockJob *job, int ret) +void block_job_completed(BlockJob *job, int ret) { BlockDriverState *bs = job->bs; @@ -99,6 +99,16 @@ void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp) job->speed = speed; } +void block_job_complete(BlockJob *job, Error **errp) +{ + if (job->paused || job->cancelled || !job->job_type->complete) { + error_set(errp, QERR_BLOCK_JOB_NOT_READY, job->bs->device_name); + return; + } + + job->job_type->complete(job, errp); +} + void block_job_pause(BlockJob *job) { job->paused = true; @@ -132,6 +142,9 @@ bool block_job_is_cancelled(BlockJob *job) void block_job_iostatus_reset(BlockJob *job) { job->iostatus = BLOCK_DEVICE_IO_STATUS_OK; + if (job->job_type->iostatus_reset) { + job->job_type->iostatus_reset(job); + } } struct BlockCancelData { @@ -215,6 +228,27 @@ static void block_job_iostatus_set_err(BlockJob *job, int error) } +QObject *qobject_from_block_job(BlockJob *job) +{ + return qobject_from_jsonf("{ 'type': %s," + "'device': %s," + "'len': %" PRId64 "," + "'offset': %" PRId64 "," + "'speed': %" PRId64 " }", + job->job_type->job_type, + bdrv_get_device_name(job->bs), + job->len, + job->offset, + job->speed); +} + +void block_job_ready(BlockJob *job) +{ + QObject *data = qobject_from_block_job(job); + monitor_protocol_event(QEVENT_BLOCK_JOB_READY, data); + qobject_decref(data); +} + BlockErrorAction block_job_error_action(BlockJob *job, BlockDriverState *bs, BlockdevOnError on_err, int is_read, int error) @@ -41,6 +41,15 @@ typedef struct BlockJobType { /** Optional callback for job types that support setting a speed limit */ void (*set_speed)(BlockJob *job, int64_t speed, Error **errp); + + /** Optional callback for job types that need to forward I/O status reset */ + void (*iostatus_reset)(BlockJob *job); + + /** + * Optional callback for job types whose completion must be triggered + * manually. + */ + void (*complete)(BlockJob *job, Error **errp); } BlockJobType; /** @@ -135,14 +144,14 @@ void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs, void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns); /** - * block_job_complete: + * block_job_completed: * @job: The job being completed. * @ret: The status code. * * Call the completion function that was registered at creation time, and * free @job. */ -void block_job_complete(BlockJob *job, int ret); +void block_job_completed(BlockJob *job, int ret); /** * block_job_set_speed: @@ -164,6 +173,15 @@ void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp); void block_job_cancel(BlockJob *job); /** + * block_job_complete: + * @job: The job to be completed. + * @errp: Error object. + * + * Asynchronously complete the specified job. + */ +void block_job_complete(BlockJob *job, Error **errp); + +/** * block_job_is_cancelled: * @job: The job being queried. * @@ -196,6 +214,22 @@ void block_job_pause(BlockJob *job); void block_job_resume(BlockJob *job); /** + * qobject_from_block_job: + * @job: The job whose information is requested. + * + * Return a QDict corresponding to @job's query-block-jobs entry. + */ +QObject *qobject_from_block_job(BlockJob *job); + +/** + * block_job_ready: + * @job: The job which is now ready to complete. + * + * Send a BLOCK_JOB_READY event for the specified job. + */ +void block_job_ready(BlockJob *job); + +/** * block_job_is_paused: * @job: The job being queried. * @@ -222,7 +256,8 @@ int block_job_cancel_sync(BlockJob *job); * block_job_iostatus_reset: * @job: The job whose I/O status should be reset. * - * Reset I/O status on @job. + * Reset I/O status on @job and on BlockDriverState objects it uses, + * other than job->bs. */ void block_job_iostatus_reset(BlockJob *job); diff --git a/hmp-commands.hx b/hmp-commands.hx index e0b537d..f916385 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -109,7 +109,22 @@ ETEXI STEXI @item block_job_cancel @findex block_job_cancel -Stop an active block streaming operation. +Stop an active background block operation (streaming, mirroring). +ETEXI + + { + .name = "block_job_complete", + .args_type = "device:B", + .params = "device", + .help = "stop an active background block operation", + .mhandler.cmd = hmp_block_job_complete, + }, + +STEXI +@item block_job_complete +@findex block_job_complete +Manually trigger completion of an active background block operation. +For mirroring, this will switch the device to the destination path. ETEXI { @@ -989,6 +1004,27 @@ Snapshot device, using snapshot file as target if provided ETEXI { + .name = "drive_mirror", + .args_type = "reuse:-n,full:-f,device:B,target:s,format:s?", + .params = "[-n] [-f] device target [format]", + .help = "initiates live storage\n\t\t\t" + "migration for a device. The device's contents are\n\t\t\t" + "copied to the new image file, including data that\n\t\t\t" + "is written after the command is started.\n\t\t\t" + "The -n flag requests QEMU to reuse the image found\n\t\t\t" + "in new-image-file, instead of recreating it from scratch.\n\t\t\t" + "The -f flag requests QEMU to copy the whole disk,\n\t\t\t" + "so that the result does not need a backing file.\n\t\t\t", + .mhandler.cmd = hmp_drive_mirror, + }, +STEXI +@item drive_mirror +@findex drive_mirror +Start mirroring a block device's writes to a new destination, +using the specified target. +ETEXI + + { .name = "drive_add", .args_type = "pci_addr:s,opts:s", .params = "[[<domain>:]<bus>:]<slot>\n" @@ -770,6 +770,35 @@ void hmp_block_resize(Monitor *mon, const QDict *qdict) hmp_handle_error(mon, &errp); } +void hmp_drive_mirror(Monitor *mon, const QDict *qdict) +{ + const char *device = qdict_get_str(qdict, "device"); + const char *filename = qdict_get_str(qdict, "target"); + const char *format = qdict_get_try_str(qdict, "format"); + int reuse = qdict_get_try_bool(qdict, "reuse", 0); + int full = qdict_get_try_bool(qdict, "full", 0); + enum NewImageMode mode; + Error *errp = NULL; + + if (!filename) { + error_set(&errp, QERR_MISSING_PARAMETER, "target"); + hmp_handle_error(mon, &errp); + return; + } + + if (reuse) { + mode = NEW_IMAGE_MODE_EXISTING; + } else { + mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS; + } + + qmp_drive_mirror(device, filename, !!format, format, + full ? MIRROR_SYNC_MODE_FULL : MIRROR_SYNC_MODE_TOP, + true, mode, false, 0, + false, 0, false, 0, &errp); + hmp_handle_error(mon, &errp); +} + void hmp_snapshot_blkdev(Monitor *mon, const QDict *qdict) { const char *device = qdict_get_str(qdict, "device"); @@ -989,6 +1018,16 @@ void hmp_block_job_resume(Monitor *mon, const QDict *qdict) hmp_handle_error(mon, &error); } +void hmp_block_job_complete(Monitor *mon, const QDict *qdict) +{ + Error *error = NULL; + const char *device = qdict_get_str(qdict, "device"); + + qmp_block_job_complete(device, &error); + + hmp_handle_error(mon, &error); +} + typedef struct MigrationStatus { QEMUTimer *timer; @@ -51,6 +51,7 @@ void hmp_block_passwd(Monitor *mon, const QDict *qdict); void hmp_balloon(Monitor *mon, const QDict *qdict); void hmp_block_resize(Monitor *mon, const QDict *qdict); void hmp_snapshot_blkdev(Monitor *mon, const QDict *qdict); +void hmp_drive_mirror(Monitor *mon, const QDict *qdict); void hmp_migrate_cancel(Monitor *mon, const QDict *qdict); void hmp_migrate_set_downtime(Monitor *mon, const QDict *qdict); void hmp_migrate_set_speed(Monitor *mon, const QDict *qdict); @@ -66,6 +67,7 @@ void hmp_block_job_set_speed(Monitor *mon, const QDict *qdict); void hmp_block_job_cancel(Monitor *mon, const QDict *qdict); void hmp_block_job_pause(Monitor *mon, const QDict *qdict); void hmp_block_job_resume(Monitor *mon, const QDict *qdict); +void hmp_block_job_complete(Monitor *mon, const QDict *qdict); void hmp_migrate(Monitor *mon, const QDict *qdict); void hmp_device_del(Monitor *mon, const QDict *qdict); void hmp_dump_guest_memory(Monitor *mon, const QDict *qdict); @@ -451,6 +451,7 @@ static const char *monitor_event_names[] = { [QEVENT_BLOCK_JOB_COMPLETED] = "BLOCK_JOB_COMPLETED", [QEVENT_BLOCK_JOB_CANCELLED] = "BLOCK_JOB_CANCELLED", [QEVENT_BLOCK_JOB_ERROR] = "BLOCK_JOB_ERROR", + [QEVENT_BLOCK_JOB_READY] = "BLOCK_JOB_READY", [QEVENT_DEVICE_TRAY_MOVED] = "DEVICE_TRAY_MOVED", [QEVENT_SUSPEND] = "SUSPEND", [QEVENT_SUSPEND_DISK] = "SUSPEND_DISK", @@ -2105,8 +2106,9 @@ static void monitor_fdset_cleanup(MonFdset *mon_fdset) MonFdsetFd *mon_fdset_fd_next; QLIST_FOREACH_SAFE(mon_fdset_fd, &mon_fdset->fds, next, mon_fdset_fd_next) { - if (mon_fdset_fd->removed || - (QLIST_EMPTY(&mon_fdset->dup_fds) && mon_refcount == 0)) { + if ((mon_fdset_fd->removed || + (QLIST_EMPTY(&mon_fdset->dup_fds) && mon_refcount == 0)) && + runstate_is_running()) { close(mon_fdset_fd->fd); g_free(mon_fdset_fd->opaque); QLIST_REMOVE(mon_fdset_fd, next); @@ -2135,8 +2137,6 @@ AddfdInfo *qmp_add_fd(bool has_fdset_id, int64_t fdset_id, bool has_opaque, { int fd; Monitor *mon = cur_mon; - MonFdset *mon_fdset; - MonFdsetFd *mon_fdset_fd; AddfdInfo *fdinfo; fd = qemu_chr_fe_get_msgfd(mon->chr); @@ -2145,57 +2145,11 @@ AddfdInfo *qmp_add_fd(bool has_fdset_id, int64_t fdset_id, bool has_opaque, goto error; } - if (has_fdset_id) { - QLIST_FOREACH(mon_fdset, &mon_fdsets, next) { - if (mon_fdset->id == fdset_id) { - break; - } - } - if (mon_fdset == NULL) { - error_set(errp, QERR_INVALID_PARAMETER_VALUE, "fdset-id", - "an existing fdset-id"); - goto error; - } - } else { - int64_t fdset_id_prev = -1; - MonFdset *mon_fdset_cur = QLIST_FIRST(&mon_fdsets); - - /* Use first available fdset ID */ - QLIST_FOREACH(mon_fdset, &mon_fdsets, next) { - mon_fdset_cur = mon_fdset; - if (fdset_id_prev == mon_fdset_cur->id - 1) { - fdset_id_prev = mon_fdset_cur->id; - continue; - } - break; - } - - mon_fdset = g_malloc0(sizeof(*mon_fdset)); - mon_fdset->id = fdset_id_prev + 1; - - /* The fdset list is ordered by fdset ID */ - if (mon_fdset->id == 0) { - QLIST_INSERT_HEAD(&mon_fdsets, mon_fdset, next); - } else if (mon_fdset->id < mon_fdset_cur->id) { - QLIST_INSERT_BEFORE(mon_fdset_cur, mon_fdset, next); - } else { - QLIST_INSERT_AFTER(mon_fdset_cur, mon_fdset, next); - } - } - - mon_fdset_fd = g_malloc0(sizeof(*mon_fdset_fd)); - mon_fdset_fd->fd = fd; - mon_fdset_fd->removed = false; - if (has_opaque) { - mon_fdset_fd->opaque = g_strdup(opaque); + fdinfo = monitor_fdset_add_fd(fd, has_fdset_id, fdset_id, + has_opaque, opaque, errp); + if (fdinfo) { + return fdinfo; } - QLIST_INSERT_HEAD(&mon_fdset->fds, mon_fdset_fd, next); - - fdinfo = g_malloc0(sizeof(*fdinfo)); - fdinfo->fdset_id = mon_fdset->id; - fdinfo->fd = mon_fdset_fd->fd; - - return fdinfo; error: if (fd != -1) { @@ -2281,6 +2235,87 @@ FdsetInfoList *qmp_query_fdsets(Error **errp) return fdset_list; } +AddfdInfo *monitor_fdset_add_fd(int fd, bool has_fdset_id, int64_t fdset_id, + bool has_opaque, const char *opaque, + Error **errp) +{ + MonFdset *mon_fdset = NULL; + MonFdsetFd *mon_fdset_fd; + AddfdInfo *fdinfo; + + if (has_fdset_id) { + QLIST_FOREACH(mon_fdset, &mon_fdsets, next) { + /* Break if match found or match impossible due to ordering by ID */ + if (fdset_id <= mon_fdset->id) { + if (fdset_id < mon_fdset->id) { + mon_fdset = NULL; + } + break; + } + } + } + + if (mon_fdset == NULL) { + int64_t fdset_id_prev = -1; + MonFdset *mon_fdset_cur = QLIST_FIRST(&mon_fdsets); + + if (has_fdset_id) { + if (fdset_id < 0) { + error_set(errp, QERR_INVALID_PARAMETER_VALUE, "fdset-id", + "a non-negative value"); + return NULL; + } + /* Use specified fdset ID */ + QLIST_FOREACH(mon_fdset, &mon_fdsets, next) { + mon_fdset_cur = mon_fdset; + if (fdset_id < mon_fdset_cur->id) { + break; + } + } + } else { + /* Use first available fdset ID */ + QLIST_FOREACH(mon_fdset, &mon_fdsets, next) { + mon_fdset_cur = mon_fdset; + if (fdset_id_prev == mon_fdset_cur->id - 1) { + fdset_id_prev = mon_fdset_cur->id; + continue; + } + break; + } + } + + mon_fdset = g_malloc0(sizeof(*mon_fdset)); + if (has_fdset_id) { + mon_fdset->id = fdset_id; + } else { + mon_fdset->id = fdset_id_prev + 1; + } + + /* The fdset list is ordered by fdset ID */ + if (!mon_fdset_cur) { + QLIST_INSERT_HEAD(&mon_fdsets, mon_fdset, next); + } else if (mon_fdset->id < mon_fdset_cur->id) { + QLIST_INSERT_BEFORE(mon_fdset_cur, mon_fdset, next); + } else { + QLIST_INSERT_AFTER(mon_fdset_cur, mon_fdset, next); + } + } + + mon_fdset_fd = g_malloc0(sizeof(*mon_fdset_fd)); + mon_fdset_fd->fd = fd; + mon_fdset_fd->removed = false; + if (has_opaque) { + mon_fdset_fd->opaque = g_strdup(opaque); + } + QLIST_INSERT_HEAD(&mon_fdset->fds, mon_fdset_fd, next); + + fdinfo = g_malloc0(sizeof(*fdinfo)); + fdinfo->fdset_id = mon_fdset->id; + fdinfo->fd = mon_fdset_fd->fd; + + return fdinfo; +} + int monitor_fdset_get_fd(int64_t fdset_id, int flags) { #ifndef _WIN32 @@ -39,6 +39,7 @@ typedef enum MonitorEvent { QEVENT_BLOCK_JOB_COMPLETED, QEVENT_BLOCK_JOB_CANCELLED, QEVENT_BLOCK_JOB_ERROR, + QEVENT_BLOCK_JOB_READY, QEVENT_DEVICE_TRAY_MOVED, QEVENT_SUSPEND, QEVENT_SUSPEND_DISK, @@ -90,6 +91,9 @@ int qmp_qom_set(Monitor *mon, const QDict *qdict, QObject **ret); int qmp_qom_get(Monitor *mon, const QDict *qdict, QObject **ret); +AddfdInfo *monitor_fdset_add_fd(int fd, bool has_fdset_id, int64_t fdset_id, + bool has_opaque, const char *opaque, + Error **errp); int monitor_fdset_get_fd(int64_t fdset_id, int flags); int monitor_fdset_dup_fd_add(int64_t fdset_id, int dup_fd); int monitor_fdset_dup_fd_remove(int dup_fd); @@ -88,7 +88,6 @@ static int qemu_dup_flags(int fd, int flags) int ret; int serrno; int dup_flags; - int setfl_flags; #ifdef F_DUPFD_CLOEXEC ret = fcntl(fd, F_DUPFD_CLOEXEC, 0); @@ -113,16 +112,7 @@ static int qemu_dup_flags(int fd, int flags) } /* Set/unset flags that we can with fcntl */ - setfl_flags = O_APPEND | O_ASYNC | O_NONBLOCK; -#ifdef O_NOATIME - setfl_flags |= O_NOATIME; -#endif -#ifdef O_DIRECT - setfl_flags |= O_DIRECT; -#endif - dup_flags &= ~setfl_flags; - dup_flags |= (flags & setfl_flags); - if (fcntl(ret, F_SETFL, dup_flags) == -1) { + if (fcntl(ret, F_SETFL, flags) == -1) { goto fail; } diff --git a/qapi-schema.json b/qapi-schema.json index 68766ae..542e3ac 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -661,6 +661,18 @@ { 'enum': 'BlockDeviceIoStatus', 'data': [ 'ok', 'failed', 'nospace' ] } ## +# @BlockDirtyInfo: +# +# Block dirty bitmap information. +# +# @count: number of dirty bytes according to the dirty bitmap +# +# Since: 1.3 +## +{ 'type': 'BlockDirtyInfo', + 'data': {'count': 'int'} } + +## # @BlockInfo: # # Block device information. This structure describes a virtual device and @@ -679,6 +691,9 @@ # @tray_open: #optional True if the device has a tray and it is open # (only present if removable is true) # +# @dirty: #optional dirty bitmap information (only present if the dirty +# bitmap is enabled) +# # @io-status: #optional @BlockDeviceIoStatus. Only present if the device # supports it and the VM is configured to stop on errors # @@ -690,7 +705,8 @@ { 'type': 'BlockInfo', 'data': {'device': 'str', 'type': 'str', 'removable': 'bool', 'locked': 'bool', '*inserted': 'BlockDeviceInfo', - '*tray_open': 'bool', '*io-status': 'BlockDeviceIoStatus'} } + '*tray_open': 'bool', '*io-status': 'BlockDeviceIoStatus', + '*dirty': 'BlockDirtyInfo' } } ## # @query-block: @@ -1150,6 +1166,23 @@ 'data': ['report', 'ignore', 'enospc', 'stop'] } ## +# @MirrorSyncMode: +# +# An enumeration of possible behaviors for the initial synchronization +# phase of storage mirroring. +# +# @top: copies data in the topmost image to the destination +# +# @full: copies data from all images to the destination +# +# @none: only copy data written from now on +# +# Since: 1.3 +## +{ 'enum': 'MirrorSyncMode', + 'data': ['top', 'full', 'none'] } + +## # @BlockJobInfo: # # Information about a long-running block device operation. @@ -1578,6 +1611,49 @@ 'data': { 'device': 'str', '*base': 'str', 'top': 'str', '*speed': 'int' } } +## +# @drive-mirror +# +# Start mirroring a block device's writes to a new destination. +# +# @device: the name of the device whose writes should be mirrored. +# +# @target: the target of the new image. If the file exists, or if it +# is a device, the existing file/device will be used as the new +# destination. If it does not exist, a new file will be created. +# +# @format: #optional the format of the new destination, default is to +# probe if @mode is 'existing', else the format of the source +# +# @mode: #optional whether and how QEMU should create a new image, default is +# 'absolute-paths'. +# +# @speed: #optional the maximum speed, in bytes per second +# +# @sync: what parts of the disk image should be copied to the destination +# (all the disk, only the sectors allocated in the topmost image, or +# only new I/O). +# +# @on-source-error: #optional the action to take on an error on the source, +# default 'report'. 'stop' and 'enospc' can only be used +# if the block device supports io-status (see BlockInfo). +# +# @on-target-error: #optional the action to take on an error on the target, +# default 'report' (no limitations, since this applies to +# a different block device than @device). +# +# Returns: nothing on success +# If @device is not a valid block device, DeviceNotFound +# +# Since 1.3 +## +{ 'command': 'drive-mirror', + 'data': { 'device': 'str', 'target': 'str', '*format': 'str', + 'sync': 'MirrorSyncMode', '*mode': 'NewImageMode', + '*speed': 'int', '*on-source-error': 'BlockdevOnError', + '*on-target-error': 'BlockdevOnError' } } + +## # @migrate_cancel # # Cancel the current executing migration process. @@ -2022,6 +2098,32 @@ { 'command': 'block-job-resume', 'data': { 'device': 'str' } } ## +# @block-job-complete: +# +# Manually trigger completion of an active background block operation. This +# is supported for drive mirroring, where it also switches the device to +# write to the target path only. The ability to complete is signaled with +# a BLOCK_JOB_READY event. +# +# This command completes an active background block operation synchronously. +# The ordering of this command's return with the BLOCK_JOB_COMPLETED event +# is not defined. Note that if an I/O error occurs during the processing of +# this command: 1) the command itself will fail; 2) the error will be processed +# according to the rerror/werror arguments that were specified when starting +# the operation. +# +# A cancelled or paused job cannot be completed. +# +# @device: the device name +# +# Returns: Nothing on success +# If no background operation is active on this device, DeviceNotActive +# +# Since: 1.3 +## +{ 'command': 'block-job-complete', 'data': { 'device': 'str' } } + +## # @ObjectTypeInfo: # # This structure describes a search result from @qom-list-types @@ -2683,7 +2785,7 @@ # # Returns: @AddfdInfo on success # If file descriptor was not received, FdNotSupplied -# If @fdset-id does not exist, InvalidParameterValue +# If @fdset-id is a negative value, InvalidParameterValue # # Notes: The list of fd sets is shared by all monitor connections. # diff --git a/qemu-config.c b/qemu-config.c index 97ffb97..e854fff 100644 --- a/qemu-config.c +++ b/qemu-config.c @@ -657,6 +657,27 @@ QemuOptsList qemu_boot_opts = { }, }; +static QemuOptsList qemu_add_fd_opts = { + .name = "add-fd", + .head = QTAILQ_HEAD_INITIALIZER(qemu_add_fd_opts.head), + .desc = { + { + .name = "fd", + .type = QEMU_OPT_NUMBER, + .help = "file descriptor of which a duplicate is added to fd set", + },{ + .name = "set", + .type = QEMU_OPT_NUMBER, + .help = "ID of the fd set to add fd to", + },{ + .name = "opaque", + .type = QEMU_OPT_STRING, + .help = "free-form string used to describe fd", + }, + { /* end of list */ } + }, +}; + static QemuOptsList *vm_config_groups[32] = { &qemu_drive_opts, &qemu_chardev_opts, @@ -673,6 +694,7 @@ static QemuOptsList *vm_config_groups[32] = { &qemu_boot_opts, &qemu_iscsi_opts, &qemu_sandbox_opts, + &qemu_add_fd_opts, NULL, }; diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx index 0ef82e9..a181363 100644 --- a/qemu-img-cmds.hx +++ b/qemu-img-cmds.hx @@ -34,9 +34,9 @@ STEXI ETEXI DEF("info", img_info, - "info [-f fmt] [--output=ofmt] filename") + "info [-f fmt] [--output=ofmt] [--backing-chain] filename") STEXI -@item info [-f @var{fmt}] [--output=@var{ofmt}] @var{filename} +@item info [-f @var{fmt}] [--output=@var{ofmt}] [--backing-chain] @var{filename} ETEXI DEF("snapshot", img_snapshot, @@ -674,7 +674,7 @@ static int img_convert(int argc, char **argv) QEMUOptionParameter *out_baseimg_param; char *options = NULL; const char *snapshot_name = NULL; - float local_progress; + float local_progress = 0; int min_sparse = 8; /* Need at least 4k of zeros for sparse detection */ fmt = NULL; @@ -914,8 +914,10 @@ static int img_convert(int argc, char **argv) sector_num = 0; nb_sectors = total_sectors; - local_progress = (float)100 / - (nb_sectors / MIN(nb_sectors, cluster_sectors)); + if (nb_sectors != 0) { + local_progress = (float)100 / + (nb_sectors / MIN(nb_sectors, cluster_sectors)); + } for(;;) { int64_t bs_num; @@ -986,8 +988,10 @@ static int img_convert(int argc, char **argv) sector_num = 0; // total number of sectors converted so far nb_sectors = total_sectors - sector_num; - local_progress = (float)100 / - (nb_sectors / MIN(nb_sectors, IO_BUF_SIZE / 512)); + if (nb_sectors != 0) { + local_progress = (float)100 / + (nb_sectors / MIN(nb_sectors, IO_BUF_SIZE / 512)); + } for(;;) { nb_sectors = total_sectors - sector_num; @@ -1108,6 +1112,23 @@ static void dump_snapshots(BlockDriverState *bs) g_free(sn_tab); } +static void dump_json_image_info_list(ImageInfoList *list) +{ + Error *errp = NULL; + QString *str; + QmpOutputVisitor *ov = qmp_output_visitor_new(); + QObject *obj; + visit_type_ImageInfoList(qmp_output_get_visitor(ov), + &list, NULL, &errp); + obj = qmp_output_get_qobject(ov); + str = qobject_to_json_pretty(obj); + assert(str != NULL); + printf("%s\n", qstring_get_str(str)); + qobject_decref(obj); + qmp_output_visitor_cleanup(ov); + QDECREF(str); +} + static void collect_snapshots(BlockDriverState *bs , ImageInfo *info) { int i, sn_count; @@ -1247,9 +1268,129 @@ static void dump_human_image_info(ImageInfo *info) printf("backing file format: %s\n", info->backing_filename_format); } } + + if (info->has_snapshots) { + SnapshotInfoList *elem; + char buf[256]; + + printf("Snapshot list:\n"); + printf("%s\n", bdrv_snapshot_dump(buf, sizeof(buf), NULL)); + + /* Ideally bdrv_snapshot_dump() would operate on SnapshotInfoList but + * we convert to the block layer's native QEMUSnapshotInfo for now. + */ + for (elem = info->snapshots; elem; elem = elem->next) { + QEMUSnapshotInfo sn = { + .vm_state_size = elem->value->vm_state_size, + .date_sec = elem->value->date_sec, + .date_nsec = elem->value->date_nsec, + .vm_clock_nsec = elem->value->vm_clock_sec * 1000000000ULL + + elem->value->vm_clock_nsec, + }; + + pstrcpy(sn.id_str, sizeof(sn.id_str), elem->value->id); + pstrcpy(sn.name, sizeof(sn.name), elem->value->name); + printf("%s\n", bdrv_snapshot_dump(buf, sizeof(buf), &sn)); + } + } } -enum {OPTION_OUTPUT = 256}; +static void dump_human_image_info_list(ImageInfoList *list) +{ + ImageInfoList *elem; + bool delim = false; + + for (elem = list; elem; elem = elem->next) { + if (delim) { + printf("\n"); + } + delim = true; + + dump_human_image_info(elem->value); + } +} + +static gboolean str_equal_func(gconstpointer a, gconstpointer b) +{ + return strcmp(a, b) == 0; +} + +/** + * Open an image file chain and return an ImageInfoList + * + * @filename: topmost image filename + * @fmt: topmost image format (may be NULL to autodetect) + * @chain: true - enumerate entire backing file chain + * false - only topmost image file + * + * Returns a list of ImageInfo objects or NULL if there was an error opening an + * image file. If there was an error a message will have been printed to + * stderr. + */ +static ImageInfoList *collect_image_info_list(const char *filename, + const char *fmt, + bool chain) +{ + ImageInfoList *head = NULL; + ImageInfoList **last = &head; + GHashTable *filenames; + + filenames = g_hash_table_new_full(g_str_hash, str_equal_func, NULL, NULL); + + while (filename) { + BlockDriverState *bs; + ImageInfo *info; + ImageInfoList *elem; + + if (g_hash_table_lookup_extended(filenames, filename, NULL, NULL)) { + error_report("Backing file '%s' creates an infinite loop.", + filename); + goto err; + } + g_hash_table_insert(filenames, (gpointer)filename, NULL); + + bs = bdrv_new_open(filename, fmt, BDRV_O_FLAGS | BDRV_O_NO_BACKING, + false); + if (!bs) { + goto err; + } + + info = g_new0(ImageInfo, 1); + collect_image_info(bs, info, filename, fmt); + collect_snapshots(bs, info); + + elem = g_new0(ImageInfoList, 1); + elem->value = info; + *last = elem; + last = &elem->next; + + bdrv_delete(bs); + + filename = fmt = NULL; + if (chain) { + if (info->has_full_backing_filename) { + filename = info->full_backing_filename; + } else if (info->has_backing_filename) { + filename = info->backing_filename; + } + if (info->has_backing_filename_format) { + fmt = info->backing_filename_format; + } + } + } + g_hash_table_destroy(filenames); + return head; + +err: + qapi_free_ImageInfoList(head); + g_hash_table_destroy(filenames); + return NULL; +} + +enum { + OPTION_OUTPUT = 256, + OPTION_BACKING_CHAIN = 257, +}; typedef enum OutputFormat { OFORMAT_JSON, @@ -1260,9 +1401,9 @@ static int img_info(int argc, char **argv) { int c; OutputFormat output_format = OFORMAT_HUMAN; + bool chain = false; const char *filename, *fmt, *output; - BlockDriverState *bs; - ImageInfo *info; + ImageInfoList *list; fmt = NULL; output = NULL; @@ -1272,6 +1413,7 @@ static int img_info(int argc, char **argv) {"help", no_argument, 0, 'h'}, {"format", required_argument, 0, 'f'}, {"output", required_argument, 0, OPTION_OUTPUT}, + {"backing-chain", no_argument, 0, OPTION_BACKING_CHAIN}, {0, 0, 0, 0} }; c = getopt_long(argc, argv, "f:h", @@ -1290,6 +1432,9 @@ static int img_info(int argc, char **argv) case OPTION_OUTPUT: output = optarg; break; + case OPTION_BACKING_CHAIN: + chain = true; + break; } } if (optind >= argc) { @@ -1306,27 +1451,25 @@ static int img_info(int argc, char **argv) return 1; } - bs = bdrv_new_open(filename, fmt, BDRV_O_FLAGS | BDRV_O_NO_BACKING, false); - if (!bs) { + list = collect_image_info_list(filename, fmt, chain); + if (!list) { return 1; } - info = g_new0(ImageInfo, 1); - collect_image_info(bs, info, filename, fmt); - switch (output_format) { case OFORMAT_HUMAN: - dump_human_image_info(info); - dump_snapshots(bs); + dump_human_image_info_list(list); break; case OFORMAT_JSON: - collect_snapshots(bs, info); - dump_json_image_info(info); + if (chain) { + dump_json_image_info_list(list); + } else { + dump_json_image_info(list->value); + } break; } - qapi_free_ImageInfo(info); - bdrv_delete(bs); + qapi_free_ImageInfoList(list); return 0; } @@ -1558,13 +1701,15 @@ static int img_rebase(int argc, char **argv) error_report("Could not open old backing file '%s'", backing_name); goto out; } - - bs_new_backing = bdrv_new("new_backing"); - ret = bdrv_open(bs_new_backing, out_baseimg, BDRV_O_FLAGS, + if (out_baseimg[0]) { + bs_new_backing = bdrv_new("new_backing"); + ret = bdrv_open(bs_new_backing, out_baseimg, BDRV_O_FLAGS, new_backing_drv); - if (ret) { - error_report("Could not open new backing file '%s'", out_baseimg); - goto out; + if (ret) { + error_report("Could not open new backing file '%s'", + out_baseimg); + goto out; + } } } @@ -1580,22 +1725,27 @@ static int img_rebase(int argc, char **argv) if (!unsafe) { uint64_t num_sectors; uint64_t old_backing_num_sectors; - uint64_t new_backing_num_sectors; + uint64_t new_backing_num_sectors = 0; uint64_t sector; int n; uint8_t * buf_old; uint8_t * buf_new; - float local_progress; + float local_progress = 0; buf_old = qemu_blockalign(bs, IO_BUF_SIZE); buf_new = qemu_blockalign(bs, IO_BUF_SIZE); bdrv_get_geometry(bs, &num_sectors); bdrv_get_geometry(bs_old_backing, &old_backing_num_sectors); - bdrv_get_geometry(bs_new_backing, &new_backing_num_sectors); + if (bs_new_backing) { + bdrv_get_geometry(bs_new_backing, &new_backing_num_sectors); + } + + if (num_sectors != 0) { + local_progress = (float)100 / + (num_sectors / MIN(num_sectors, IO_BUF_SIZE / 512)); + } - local_progress = (float)100 / - (num_sectors / MIN(num_sectors, IO_BUF_SIZE / 512)); for (sector = 0; sector < num_sectors; sector += n) { /* How many sectors can we handle with the next read? */ @@ -1629,7 +1779,7 @@ static int img_rebase(int argc, char **argv) } } - if (sector >= new_backing_num_sectors) { + if (sector >= new_backing_num_sectors || !bs_new_backing) { memset(buf_new, 0, n * BDRV_SECTOR_SIZE); } else { if (sector + n > new_backing_num_sectors) { @@ -1675,7 +1825,12 @@ static int img_rebase(int argc, char **argv) * backing file are overwritten in the COW file now, so the visible content * doesn't change when we switch the backing file. */ - ret = bdrv_change_backing_file(bs, out_baseimg, out_basefmt); + if (out_baseimg && *out_baseimg) { + ret = bdrv_change_backing_file(bs, out_baseimg, out_basefmt); + } else { + ret = bdrv_change_backing_file(bs, NULL, NULL); + } + if (ret == -ENOSPC) { error_report("Could not change the backing file to '%s': No " "space left in the file header", out_baseimg); diff --git a/qemu-img.texi b/qemu-img.texi index 8b05f2c..60b83fc 100644 --- a/qemu-img.texi +++ b/qemu-img.texi @@ -28,6 +28,10 @@ Command parameters: is the disk image format. It is guessed automatically in most cases. See below for a description of the supported disk formats. +@item --backing-chain +will enumerate information about backing files in a disk image chain. Refer +below for further description. + @item size is the disk image size in bytes. Optional suffixes @code{k} or @code{K} (kilobyte, 1024) @code{M} (megabyte, 1024k) and @code{G} (gigabyte, 1024M) @@ -129,7 +133,7 @@ created as a copy on write image of the specified base image; the @var{backing_file} should have the same content as the input's base image, however the path, image format, etc may differ. -@item info [-f @var{fmt}] [--output=@var{ofmt}] @var{filename} +@item info [-f @var{fmt}] [--output=@var{ofmt}] [--backing-chain] @var{filename} Give information about the disk image @var{filename}. Use it in particular to know the size reserved on disk which can be different @@ -137,6 +141,21 @@ from the displayed size. If VM snapshots are stored in the disk image, they are displayed too. The command can output in the format @var{ofmt} which is either @code{human} or @code{json}. +If a disk image has a backing file chain, information about each disk image in +the chain can be recursively enumerated by using the option @code{--backing-chain}. + +For instance, if you have an image chain like: + +@example +base.qcow2 <- snap1.qcow2 <- snap2.qcow2 +@end example + +To enumerate information about each disk image in the above chain, starting from top to base, do: + +@example +qemu-img info --backing-chain snap2.qcow2 +@end example + @item snapshot [-l | -a @var{snapshot} | -c @var{snapshot} | -d @var{snapshot} ] @var{filename} List, apply, create or delete snapshots in image @var{filename}. @@ -148,7 +167,9 @@ Changes the backing file of an image. Only the formats @code{qcow2} and The backing file is changed to @var{backing_file} and (if the image format of @var{filename} supports this) the backing file format is changed to -@var{backing_fmt}. +@var{backing_fmt}. If @var{backing_file} is specified as ``'' (the empty +string), then the image is rebased onto no backing file (i.e. it will exist +independently of any backing file). There are two different modes in which @code{rebase} can operate: @table @option diff --git a/qemu-options.hx b/qemu-options.hx index 46f0539..a67a255 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -253,6 +253,14 @@ qemu-system-i386 -drive file=file,index=2,media=disk qemu-system-i386 -drive file=file,index=3,media=disk @end example +You can open an image using pre-opened file descriptors from an fd set: +@example +qemu-system-i386 +-add-fd fd=3,set=2,opaque="rdwr:/path/to/file" +-add-fd fd=4,set=2,opaque="rdonly:/path/to/file" +-drive file=/dev/fdset/2,index=0,media=disk +@end example + You can connect a CDROM to the slave of ide0: @example qemu-system-i386 -drive file=file,if=ide,index=1,media=cdrom @@ -285,6 +293,34 @@ qemu-system-i386 -hda a -hdb b @end example ETEXI +DEF("add-fd", HAS_ARG, QEMU_OPTION_add_fd, + "-add-fd fd=fd,set=set[,opaque=opaque]\n" + " Add 'fd' to fd 'set'\n", QEMU_ARCH_ALL) +STEXI +@item -add-fd fd=@var{fd},set=@var{set}[,opaque=@var{opaque}] +@findex -add-fd + +Add a file descriptor to an fd set. Valid options are: + +@table @option +@item fd=@var{fd} +This option defines the file descriptor of which a duplicate is added to fd set. +The file descriptor cannot be stdin, stdout, or stderr. +@item set=@var{set} +This option defines the ID of the fd set to add the file descriptor to. +@item opaque=@var{opaque} +This option defines a free-form string that can be used to describe @var{fd}. +@end table + +You can open an image using pre-opened file descriptors from an fd set: +@example +qemu-system-i386 +-add-fd fd=3,set=2,opaque="rdwr:/path/to/file" +-add-fd fd=4,set=2,opaque="rdonly:/path/to/file" +-drive file=/dev/fdset/2,index=0,media=disk +@end example +ETEXI + DEF("set", HAS_ARG, QEMU_OPTION_set, "-set group.id.arg=value\n" " set <arg> parameter for item <id> of type <group>\n" @@ -54,6 +54,9 @@ void assert_no_error(Error *err); #define QERR_BLOCK_JOB_PAUSED \ ERROR_CLASS_GENERIC_ERROR, "The block job for device '%s' is currently paused" +#define QERR_BLOCK_JOB_NOT_READY \ + ERROR_CLASS_GENERIC_ERROR, "The active block job for device '%s' cannot be completed" + #define QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED \ ERROR_CLASS_GENERIC_ERROR, "Block format '%s' used by device '%s' does not support feature '%s'" diff --git a/qmp-commands.hx b/qmp-commands.hx index ebe9a78..5c692d0 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -843,6 +843,11 @@ EQMP .mhandler.cmd_new = qmp_marshal_input_block_job_resume, }, { + .name = "block-job-complete", + .args_type = "device:B", + .mhandler.cmd_new = qmp_marshal_input_block_job_complete, + }, + { .name = "transaction", .args_type = "actions:q", .mhandler.cmd_new = qmp_marshal_input_transaction, @@ -931,6 +936,54 @@ Example: EQMP { + .name = "drive-mirror", + .args_type = "sync:s,device:B,target:s,speed:i?,mode:s?,format:s?," + "on-source-error:s?,on-target-error:s?", + .mhandler.cmd_new = qmp_marshal_input_drive_mirror, + }, + +SQMP +drive-mirror +------------ + +Start mirroring a block device's writes to a new destination. target +specifies the target of the new image. If the file exists, or if it is +a device, it will be used as the new destination for writes. If it does not +exist, a new file will be created. format specifies the format of the +mirror image, default is to probe if mode='existing', else the format +of the source. + +Arguments: + +- "device": device name to operate on (json-string) +- "target": name of new image file (json-string) +- "format": format of new image (json-string, optional) +- "mode": how an image file should be created into the target + file/device (NewImageMode, optional, default 'absolute-paths') +- "speed": maximum speed of the streaming job, in bytes per second + (json-int) +- "sync": what parts of the disk image should be copied to the destination; + possibilities include "full" for all the disk, "top" for only the sectors + allocated in the topmost image, or "none" to only replicate new I/O + (MirrorSyncMode). +- "on-source-error": the action to take on an error on the source + (BlockdevOnError, default 'report') +- "on-target-error": the action to take on an error on the target + (BlockdevOnError, default 'report') + + + +Example: + +-> { "execute": "drive-mirror", "arguments": { "device": "ide-hd0", + "target": "/some/place/my-image", + "sync": "full", + "format": "qcow2" } } +<- { "return": {} } + +EQMP + + { .name = "balloon", .args_type = "value:M", .mhandler.cmd_new = qmp_marshal_input_balloon, diff --git a/tests/qemu-iotests/040 b/tests/qemu-iotests/040 index 258e7ea..aad535a 100755 --- a/tests/qemu-iotests/040 +++ b/tests/qemu-iotests/040 @@ -26,6 +26,7 @@ import os import iotests from iotests import qemu_img, qemu_io import struct +import errno backing_img = os.path.join(iotests.test_dir, 'backing.img') mid_img = os.path.join(iotests.test_dir, 'mid.img') @@ -111,7 +112,7 @@ class TestSingleDrive(ImageCommitTestCase): self.assert_no_active_commit() result = self.vm.qmp('block-commit', device='drive0', top='%s' % backing_img, base='%s' % backing_img) self.assert_qmp(result, 'error/class', 'GenericError') - self.assert_qmp(result, 'error/desc', 'Invalid files for merge: top and base are the same') + self.assert_qmp(result, 'error/desc', 'Base \'%s\' not found' % backing_img) def test_top_invalid(self): self.assert_no_active_commit() @@ -135,7 +136,7 @@ class TestSingleDrive(ImageCommitTestCase): self.assert_no_active_commit() result = self.vm.qmp('block-commit', device='drive0', top='%s' % backing_img, base='%s' % mid_img) self.assert_qmp(result, 'error/class', 'GenericError') - self.assert_qmp(result, 'error/desc', 'Base (%(1)s) is not reachable from top (%(2)s)' % {"1" : mid_img, "2" : backing_img}) + self.assert_qmp(result, 'error/desc', 'Base \'%s\' not found' % mid_img) def test_top_omitted(self): self.assert_no_active_commit() @@ -143,6 +144,107 @@ class TestSingleDrive(ImageCommitTestCase): self.assert_qmp(result, 'error/class', 'GenericError') self.assert_qmp(result, 'error/desc', "Parameter 'top' is missing") +class TestRelativePaths(ImageCommitTestCase): + image_len = 1 * 1024 * 1024 + test_len = 1 * 1024 * 256 + + dir1 = "dir1" + dir2 = "dir2/" + dir3 = "dir2/dir3/" + + test_img = os.path.join(iotests.test_dir, dir3, 'test.img') + mid_img = "../mid.img" + backing_img = "../dir1/backing.img" + + backing_img_abs = os.path.join(iotests.test_dir, dir1, 'backing.img') + mid_img_abs = os.path.join(iotests.test_dir, dir2, 'mid.img') + + def setUp(self): + try: + os.mkdir(os.path.join(iotests.test_dir, self.dir1)) + os.mkdir(os.path.join(iotests.test_dir, self.dir2)) + os.mkdir(os.path.join(iotests.test_dir, self.dir3)) + except OSError as exception: + if exception.errno != errno.EEXIST: + raise + self.create_image(self.backing_img_abs, TestRelativePaths.image_len) + qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % self.backing_img_abs, self.mid_img_abs) + qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % self.mid_img_abs, self.test_img) + qemu_img('rebase', '-u', '-b', self.backing_img, self.mid_img_abs) + qemu_img('rebase', '-u', '-b', self.mid_img, self.test_img) + qemu_io('-c', 'write -P 0xab 0 524288', self.backing_img_abs) + qemu_io('-c', 'write -P 0xef 524288 524288', self.mid_img_abs) + self.vm = iotests.VM().add_drive(self.test_img) + self.vm.launch() + + def tearDown(self): + self.vm.shutdown() + os.remove(self.test_img) + os.remove(self.mid_img_abs) + os.remove(self.backing_img_abs) + try: + os.rmdir(os.path.join(iotests.test_dir, self.dir1)) + os.rmdir(os.path.join(iotests.test_dir, self.dir3)) + os.rmdir(os.path.join(iotests.test_dir, self.dir2)) + except OSError as exception: + if exception.errno != errno.EEXIST and exception.errno != errno.ENOTEMPTY: + raise + + def test_commit(self): + self.assert_no_active_commit() + result = self.vm.qmp('block-commit', device='drive0', top='%s' % self.mid_img) + self.assert_qmp(result, 'return', {}) + + completed = False + while not completed: + for event in self.vm.get_qmp_events(wait=True): + if event['event'] == 'BLOCK_JOB_COMPLETED': + self.assert_qmp(event, 'data/type', 'commit') + self.assert_qmp(event, 'data/device', 'drive0') + self.assert_qmp(event, 'data/offset', self.image_len) + self.assert_qmp(event, 'data/len', self.image_len) + completed = True + + self.assert_no_active_commit() + self.vm.shutdown() + + self.assertEqual(-1, qemu_io('-c', 'read -P 0xab 0 524288', self.backing_img_abs).find("verification failed")) + self.assertEqual(-1, qemu_io('-c', 'read -P 0xef 524288 524288', self.backing_img_abs).find("verification failed")) + + def test_device_not_found(self): + result = self.vm.qmp('block-commit', device='nonexistent', top='%s' % self.mid_img) + self.assert_qmp(result, 'error/class', 'DeviceNotFound') + + def test_top_same_base(self): + self.assert_no_active_commit() + result = self.vm.qmp('block-commit', device='drive0', top='%s' % self.mid_img, base='%s' % self.mid_img) + self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_qmp(result, 'error/desc', 'Base \'%s\' not found' % self.mid_img) + + def test_top_invalid(self): + self.assert_no_active_commit() + result = self.vm.qmp('block-commit', device='drive0', top='badfile', base='%s' % self.backing_img) + self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_qmp(result, 'error/desc', 'Top image file badfile not found') + + def test_base_invalid(self): + self.assert_no_active_commit() + result = self.vm.qmp('block-commit', device='drive0', top='%s' % self.mid_img, base='badfile') + self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_qmp(result, 'error/desc', 'Base \'badfile\' not found') + + def test_top_is_active(self): + self.assert_no_active_commit() + result = self.vm.qmp('block-commit', device='drive0', top='%s' % self.test_img, base='%s' % self.backing_img) + self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_qmp(result, 'error/desc', 'Top image as the active layer is currently unsupported') + + def test_top_and_base_reversed(self): + self.assert_no_active_commit() + result = self.vm.qmp('block-commit', device='drive0', top='%s' % self.backing_img, base='%s' % self.mid_img) + self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_qmp(result, 'error/desc', 'Base \'%s\' not found' % self.mid_img) + class TestSetSpeed(ImageCommitTestCase): image_len = 80 * 1024 * 1024 # MB diff --git a/tests/qemu-iotests/040.out b/tests/qemu-iotests/040.out index dae404e..b6f2576 100644 --- a/tests/qemu-iotests/040.out +++ b/tests/qemu-iotests/040.out @@ -1,5 +1,5 @@ -......... +................ ---------------------------------------------------------------------- -Ran 9 tests +Ran 16 tests OK diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041 new file mode 100755 index 0000000..c6eb851 --- /dev/null +++ b/tests/qemu-iotests/041 @@ -0,0 +1,615 @@ +#!/usr/bin/env python +# +# Tests for image mirroring. +# +# Copyright (C) 2012 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +import time +import os +import iotests +from iotests import qemu_img, qemu_io +import struct + +backing_img = os.path.join(iotests.test_dir, 'backing.img') +target_backing_img = os.path.join(iotests.test_dir, 'target-backing.img') +test_img = os.path.join(iotests.test_dir, 'test.img') +target_img = os.path.join(iotests.test_dir, 'target.img') + +class ImageMirroringTestCase(iotests.QMPTestCase): + '''Abstract base class for image mirroring test cases''' + + def assert_no_active_mirrors(self): + result = self.vm.qmp('query-block-jobs') + self.assert_qmp(result, 'return', []) + + def cancel_and_wait(self, drive='drive0', wait_ready=True): + '''Cancel a block job and wait for it to finish''' + if wait_ready: + ready = False + while not ready: + for event in self.vm.get_qmp_events(wait=True): + if event['event'] == 'BLOCK_JOB_READY': + self.assert_qmp(event, 'data/type', 'mirror') + self.assert_qmp(event, 'data/device', drive) + ready = True + + result = self.vm.qmp('block-job-cancel', device=drive, + force=not wait_ready) + self.assert_qmp(result, 'return', {}) + + cancelled = False + while not cancelled: + for event in self.vm.get_qmp_events(wait=True): + if event['event'] == 'BLOCK_JOB_COMPLETED' or \ + event['event'] == 'BLOCK_JOB_CANCELLED': + self.assert_qmp(event, 'data/type', 'mirror') + self.assert_qmp(event, 'data/device', drive) + if wait_ready: + self.assertEquals(event['event'], 'BLOCK_JOB_COMPLETED') + self.assert_qmp(event, 'data/offset', self.image_len) + self.assert_qmp(event, 'data/len', self.image_len) + cancelled = True + + self.assert_no_active_mirrors() + + def complete_and_wait(self, drive='drive0', wait_ready=True): + '''Complete a block job and wait for it to finish''' + if wait_ready: + ready = False + while not ready: + for event in self.vm.get_qmp_events(wait=True): + if event['event'] == 'BLOCK_JOB_READY': + self.assert_qmp(event, 'data/type', 'mirror') + self.assert_qmp(event, 'data/device', drive) + ready = True + + result = self.vm.qmp('block-job-complete', device=drive) + self.assert_qmp(result, 'return', {}) + + completed = False + while not completed: + for event in self.vm.get_qmp_events(wait=True): + if event['event'] == 'BLOCK_JOB_COMPLETED': + self.assert_qmp(event, 'data/type', 'mirror') + self.assert_qmp(event, 'data/device', drive) + self.assert_qmp_absent(event, 'data/error') + self.assert_qmp(event, 'data/offset', self.image_len) + self.assert_qmp(event, 'data/len', self.image_len) + completed = True + + self.assert_no_active_mirrors() + + def create_image(self, name, size): + file = open(name, 'w') + i = 0 + while i < size: + sector = struct.pack('>l504xl', i / 512, i / 512) + file.write(sector) + i = i + 512 + file.close() + + def compare_images(self, img1, img2): + try: + qemu_img('convert', '-f', iotests.imgfmt, '-O', 'raw', img1, img1 + '.raw') + qemu_img('convert', '-f', iotests.imgfmt, '-O', 'raw', img2, img2 + '.raw') + file1 = open(img1 + '.raw', 'r') + file2 = open(img2 + '.raw', 'r') + return file1.read() == file2.read() + finally: + if file1 is not None: + file1.close() + if file2 is not None: + file2.close() + try: + os.remove(img1 + '.raw') + except OSError: + pass + try: + os.remove(img2 + '.raw') + except OSError: + pass + +class TestSingleDrive(ImageMirroringTestCase): + image_len = 1 * 1024 * 1024 # MB + + def setUp(self): + self.create_image(backing_img, TestSingleDrive.image_len) + qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % backing_img, test_img) + self.vm = iotests.VM().add_drive(test_img) + self.vm.launch() + + def tearDown(self): + self.vm.shutdown() + os.remove(test_img) + os.remove(backing_img) + try: + os.remove(target_img) + except OSError: + pass + + def test_complete(self): + self.assert_no_active_mirrors() + + result = self.vm.qmp('drive-mirror', device='drive0', sync='full', + target=target_img) + self.assert_qmp(result, 'return', {}) + + self.complete_and_wait() + result = self.vm.qmp('query-block') + self.assert_qmp(result, 'return[0]/inserted/file', target_img) + self.vm.shutdown() + self.assertTrue(self.compare_images(test_img, target_img), + 'target image does not match source after mirroring') + + def test_cancel(self): + self.assert_no_active_mirrors() + + result = self.vm.qmp('drive-mirror', device='drive0', sync='full', + target=target_img) + self.assert_qmp(result, 'return', {}) + + self.cancel_and_wait(wait_ready=False) + result = self.vm.qmp('query-block') + self.assert_qmp(result, 'return[0]/inserted/file', test_img) + self.vm.shutdown() + + def test_cancel_after_ready(self): + self.assert_no_active_mirrors() + + result = self.vm.qmp('drive-mirror', device='drive0', sync='full', + target=target_img) + self.assert_qmp(result, 'return', {}) + + self.cancel_and_wait() + result = self.vm.qmp('query-block') + self.assert_qmp(result, 'return[0]/inserted/file', test_img) + self.vm.shutdown() + self.assertTrue(self.compare_images(test_img, target_img), + 'target image does not match source after mirroring') + + def test_pause(self): + self.assert_no_active_mirrors() + + result = self.vm.qmp('drive-mirror', device='drive0', sync='full', + target=target_img) + self.assert_qmp(result, 'return', {}) + + result = self.vm.qmp('block-job-pause', device='drive0') + self.assert_qmp(result, 'return', {}) + + time.sleep(1) + result = self.vm.qmp('query-block-jobs') + offset = self.dictpath(result, 'return[0]/offset') + + time.sleep(1) + result = self.vm.qmp('query-block-jobs') + self.assert_qmp(result, 'return[0]/offset', offset) + + result = self.vm.qmp('block-job-resume', device='drive0') + self.assert_qmp(result, 'return', {}) + + self.complete_and_wait() + self.vm.shutdown() + self.assertTrue(self.compare_images(test_img, target_img), + 'target image does not match source after mirroring') + + def test_large_cluster(self): + self.assert_no_active_mirrors() + + qemu_img('create', '-f', iotests.imgfmt, '-o', 'cluster_size=%d,backing_file=%s' + % (TestSingleDrive.image_len, backing_img), target_img) + result = self.vm.qmp('drive-mirror', device='drive0', sync='full', + mode='existing', target=target_img) + self.assert_qmp(result, 'return', {}) + + self.complete_and_wait() + result = self.vm.qmp('query-block') + self.assert_qmp(result, 'return[0]/inserted/file', target_img) + self.vm.shutdown() + self.assertTrue(self.compare_images(test_img, target_img), + 'target image does not match source after mirroring') + + def test_medium_not_found(self): + result = self.vm.qmp('drive-mirror', device='ide1-cd0', sync='full', + target=target_img) + self.assert_qmp(result, 'error/class', 'GenericError') + + def test_image_not_found(self): + result = self.vm.qmp('drive-mirror', device='drive0', sync='full', + mode='existing', target=target_img) + self.assert_qmp(result, 'error/class', 'GenericError') + + def test_device_not_found(self): + result = self.vm.qmp('drive-mirror', device='nonexistent', sync='full', + target=target_img) + self.assert_qmp(result, 'error/class', 'DeviceNotFound') + +class TestMirrorNoBacking(ImageMirroringTestCase): + image_len = 2 * 1024 * 1024 # MB + + def complete_and_wait(self, drive='drive0', wait_ready=True): + self.create_image(target_backing_img, TestMirrorNoBacking.image_len) + return ImageMirroringTestCase.complete_and_wait(self, drive, wait_ready) + + def compare_images(self, img1, img2): + self.create_image(target_backing_img, TestMirrorNoBacking.image_len) + return ImageMirroringTestCase.compare_images(self, img1, img2) + + def setUp(self): + self.create_image(backing_img, TestMirrorNoBacking.image_len) + qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % backing_img, test_img) + self.vm = iotests.VM().add_drive(test_img) + self.vm.launch() + + def tearDown(self): + self.vm.shutdown() + os.remove(test_img) + os.remove(backing_img) + os.remove(target_backing_img) + os.remove(target_img) + + def test_complete(self): + self.assert_no_active_mirrors() + + qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % backing_img, target_img) + result = self.vm.qmp('drive-mirror', device='drive0', sync='full', + mode='existing', target=target_img) + self.assert_qmp(result, 'return', {}) + + self.complete_and_wait() + result = self.vm.qmp('query-block') + self.assert_qmp(result, 'return[0]/inserted/file', target_img) + self.vm.shutdown() + self.assertTrue(self.compare_images(test_img, target_img), + 'target image does not match source after mirroring') + + def test_cancel(self): + self.assert_no_active_mirrors() + + qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % backing_img, target_img) + result = self.vm.qmp('drive-mirror', device='drive0', sync='full', + mode='existing', target=target_img) + self.assert_qmp(result, 'return', {}) + + self.cancel_and_wait() + result = self.vm.qmp('query-block') + self.assert_qmp(result, 'return[0]/inserted/file', test_img) + self.vm.shutdown() + self.assertTrue(self.compare_images(test_img, target_img), + 'target image does not match source after mirroring') + +class TestReadErrors(ImageMirroringTestCase): + image_len = 2 * 1024 * 1024 # MB + + # this should be a multiple of twice the default granularity + # so that we hit this offset first in state 1 + MIRROR_GRANULARITY = 1024 * 1024 + + def create_blkdebug_file(self, name, event, errno): + file = open(name, 'w') + file.write(''' +[inject-error] +state = "1" +event = "%s" +errno = "%d" +immediately = "off" +once = "on" +sector = "%d" + +[set-state] +state = "1" +event = "%s" +new_state = "2" + +[set-state] +state = "2" +event = "%s" +new_state = "1" +''' % (event, errno, self.MIRROR_GRANULARITY / 512, event, event)) + file.close() + + def setUp(self): + self.blkdebug_file = backing_img + ".blkdebug" + self.create_image(backing_img, TestReadErrors.image_len) + self.create_blkdebug_file(self.blkdebug_file, "read_aio", 5) + qemu_img('create', '-f', iotests.imgfmt, + '-o', 'backing_file=blkdebug:%s:%s,backing_fmt=raw' + % (self.blkdebug_file, backing_img), + test_img) + self.vm = iotests.VM().add_drive(test_img) + self.vm.launch() + + def tearDown(self): + self.vm.shutdown() + os.remove(test_img) + os.remove(backing_img) + os.remove(self.blkdebug_file) + + def test_report_read(self): + self.assert_no_active_mirrors() + + result = self.vm.qmp('drive-mirror', device='drive0', sync='full', + target=target_img) + self.assert_qmp(result, 'return', {}) + + completed = False + error = False + while not completed: + for event in self.vm.get_qmp_events(wait=True): + if event['event'] == 'BLOCK_JOB_ERROR': + self.assert_qmp(event, 'data/device', 'drive0') + self.assert_qmp(event, 'data/operation', 'read') + error = True + elif event['event'] == 'BLOCK_JOB_READY': + self.assertTrue(False, 'job completed unexpectedly') + elif event['event'] == 'BLOCK_JOB_COMPLETED': + self.assertTrue(error, 'job completed unexpectedly') + self.assert_qmp(event, 'data/type', 'mirror') + self.assert_qmp(event, 'data/device', 'drive0') + self.assert_qmp(event, 'data/error', 'Input/output error') + self.assert_qmp(event, 'data/len', self.image_len) + completed = True + + self.assert_no_active_mirrors() + self.vm.shutdown() + + def test_ignore_read(self): + self.assert_no_active_mirrors() + + result = self.vm.qmp('drive-mirror', device='drive0', sync='full', + target=target_img, on_source_error='ignore') + self.assert_qmp(result, 'return', {}) + + event = self.vm.get_qmp_event(wait=True) + self.assertEquals(event['event'], 'BLOCK_JOB_ERROR') + self.assert_qmp(event, 'data/device', 'drive0') + self.assert_qmp(event, 'data/operation', 'read') + result = self.vm.qmp('query-block-jobs') + self.assert_qmp(result, 'return[0]/paused', False) + self.complete_and_wait() + self.vm.shutdown() + + def test_stop_read(self): + self.assert_no_active_mirrors() + + result = self.vm.qmp('drive-mirror', device='drive0', sync='full', + target=target_img, on_source_error='stop') + self.assert_qmp(result, 'return', {}) + + error = False + ready = False + while not ready: + for event in self.vm.get_qmp_events(wait=True): + if event['event'] == 'BLOCK_JOB_ERROR': + self.assert_qmp(event, 'data/device', 'drive0') + self.assert_qmp(event, 'data/operation', 'read') + + result = self.vm.qmp('query-block-jobs') + self.assert_qmp(result, 'return[0]/paused', True) + self.assert_qmp(result, 'return[0]/io-status', 'failed') + + result = self.vm.qmp('block-job-resume', device='drive0') + self.assert_qmp(result, 'return', {}) + error = True + elif event['event'] == 'BLOCK_JOB_READY': + self.assertTrue(error, 'job completed unexpectedly') + self.assert_qmp(event, 'data/device', 'drive0') + ready = True + + result = self.vm.qmp('query-block-jobs') + self.assert_qmp(result, 'return[0]/paused', False) + self.assert_qmp(result, 'return[0]/io-status', 'ok') + + self.complete_and_wait(wait_ready=False) + self.assert_no_active_mirrors() + self.vm.shutdown() + +class TestWriteErrors(ImageMirroringTestCase): + image_len = 2 * 1024 * 1024 # MB + + # this should be a multiple of twice the default granularity + # so that we hit this offset first in state 1 + MIRROR_GRANULARITY = 1024 * 1024 + + def create_blkdebug_file(self, name, event, errno): + file = open(name, 'w') + file.write(''' +[inject-error] +state = "1" +event = "%s" +errno = "%d" +immediately = "off" +once = "on" +sector = "%d" + +[set-state] +state = "1" +event = "%s" +new_state = "2" + +[set-state] +state = "2" +event = "%s" +new_state = "1" +''' % (event, errno, self.MIRROR_GRANULARITY / 512, event, event)) + file.close() + + def setUp(self): + self.blkdebug_file = target_img + ".blkdebug" + self.create_image(backing_img, TestWriteErrors.image_len) + self.create_blkdebug_file(self.blkdebug_file, "write_aio", 5) + qemu_img('create', '-f', iotests.imgfmt, '-obacking_file=%s' %(backing_img), test_img) + self.vm = iotests.VM().add_drive(test_img) + self.target_img = 'blkdebug:%s:%s' % (self.blkdebug_file, target_img) + qemu_img('create', '-f', iotests.imgfmt, '-osize=%d' %(TestWriteErrors.image_len), target_img) + self.vm.launch() + + def tearDown(self): + self.vm.shutdown() + os.remove(test_img) + os.remove(backing_img) + os.remove(self.blkdebug_file) + + def test_report_write(self): + self.assert_no_active_mirrors() + + result = self.vm.qmp('drive-mirror', device='drive0', sync='full', + mode='existing', target=self.target_img) + self.assert_qmp(result, 'return', {}) + + completed = False + error = False + while not completed: + for event in self.vm.get_qmp_events(wait=True): + if event['event'] == 'BLOCK_JOB_ERROR': + self.assert_qmp(event, 'data/device', 'drive0') + self.assert_qmp(event, 'data/operation', 'write') + error = True + elif event['event'] == 'BLOCK_JOB_READY': + self.assertTrue(False, 'job completed unexpectedly') + elif event['event'] == 'BLOCK_JOB_COMPLETED': + self.assertTrue(error, 'job completed unexpectedly') + self.assert_qmp(event, 'data/type', 'mirror') + self.assert_qmp(event, 'data/device', 'drive0') + self.assert_qmp(event, 'data/error', 'Input/output error') + self.assert_qmp(event, 'data/len', self.image_len) + completed = True + + self.assert_no_active_mirrors() + self.vm.shutdown() + + def test_ignore_write(self): + self.assert_no_active_mirrors() + + result = self.vm.qmp('drive-mirror', device='drive0', sync='full', + mode='existing', target=self.target_img, + on_target_error='ignore') + self.assert_qmp(result, 'return', {}) + + event = self.vm.get_qmp_event(wait=True) + self.assertEquals(event['event'], 'BLOCK_JOB_ERROR') + self.assert_qmp(event, 'data/device', 'drive0') + self.assert_qmp(event, 'data/operation', 'write') + result = self.vm.qmp('query-block-jobs') + self.assert_qmp(result, 'return[0]/paused', False) + self.complete_and_wait() + self.vm.shutdown() + + def test_stop_write(self): + self.assert_no_active_mirrors() + + result = self.vm.qmp('drive-mirror', device='drive0', sync='full', + mode='existing', target=self.target_img, + on_target_error='stop') + self.assert_qmp(result, 'return', {}) + + error = False + ready = False + while not ready: + for event in self.vm.get_qmp_events(wait=True): + if event['event'] == 'BLOCK_JOB_ERROR': + self.assert_qmp(event, 'data/device', 'drive0') + self.assert_qmp(event, 'data/operation', 'write') + + result = self.vm.qmp('query-block-jobs') + self.assert_qmp(result, 'return[0]/paused', True) + self.assert_qmp(result, 'return[0]/io-status', 'failed') + + result = self.vm.qmp('block-job-resume', device='drive0') + self.assert_qmp(result, 'return', {}) + + result = self.vm.qmp('query-block-jobs') + self.assert_qmp(result, 'return[0]/paused', False) + self.assert_qmp(result, 'return[0]/io-status', 'ok') + error = True + elif event['event'] == 'BLOCK_JOB_READY': + self.assertTrue(error, 'job completed unexpectedly') + self.assert_qmp(event, 'data/device', 'drive0') + ready = True + + self.complete_and_wait(wait_ready=False) + self.assert_no_active_mirrors() + self.vm.shutdown() + +class TestSetSpeed(ImageMirroringTestCase): + image_len = 80 * 1024 * 1024 # MB + + def setUp(self): + qemu_img('create', backing_img, str(TestSetSpeed.image_len)) + qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % backing_img, test_img) + self.vm = iotests.VM().add_drive(test_img) + self.vm.launch() + + def tearDown(self): + self.vm.shutdown() + os.remove(test_img) + os.remove(backing_img) + os.remove(target_img) + + def test_set_speed(self): + self.assert_no_active_mirrors() + + result = self.vm.qmp('drive-mirror', device='drive0', sync='full', + target=target_img) + self.assert_qmp(result, 'return', {}) + + # Default speed is 0 + result = self.vm.qmp('query-block-jobs') + self.assert_qmp(result, 'return[0]/device', 'drive0') + self.assert_qmp(result, 'return[0]/speed', 0) + + result = self.vm.qmp('block-job-set-speed', device='drive0', speed=8 * 1024 * 1024) + self.assert_qmp(result, 'return', {}) + + # Ensure the speed we set was accepted + result = self.vm.qmp('query-block-jobs') + self.assert_qmp(result, 'return[0]/device', 'drive0') + self.assert_qmp(result, 'return[0]/speed', 8 * 1024 * 1024) + + self.cancel_and_wait() + + # Check setting speed in drive-mirror works + result = self.vm.qmp('drive-mirror', device='drive0', sync='full', + target=target_img, speed=4*1024*1024) + self.assert_qmp(result, 'return', {}) + + result = self.vm.qmp('query-block-jobs') + self.assert_qmp(result, 'return[0]/device', 'drive0') + self.assert_qmp(result, 'return[0]/speed', 4 * 1024 * 1024) + + self.cancel_and_wait() + + def test_set_speed_invalid(self): + self.assert_no_active_mirrors() + + result = self.vm.qmp('drive-mirror', device='drive0', sync='full', + target=target_img, speed=-1) + self.assert_qmp(result, 'error/class', 'GenericError') + + self.assert_no_active_mirrors() + + result = self.vm.qmp('drive-mirror', device='drive0', sync='full', + target=target_img) + self.assert_qmp(result, 'return', {}) + + result = self.vm.qmp('block-job-set-speed', device='drive0', speed=-1) + self.assert_qmp(result, 'error/class', 'GenericError') + + self.cancel_and_wait() + +if __name__ == '__main__': + iotests.main(supported_fmts=['qcow2', 'qed']) diff --git a/tests/qemu-iotests/041.out b/tests/qemu-iotests/041.out new file mode 100644 index 0000000..71009c2 --- /dev/null +++ b/tests/qemu-iotests/041.out @@ -0,0 +1,5 @@ +.................. +---------------------------------------------------------------------- +Ran 18 tests + +OK diff --git a/tests/qemu-iotests/042 b/tests/qemu-iotests/042 new file mode 100755 index 0000000..c3c3ca8 --- /dev/null +++ b/tests/qemu-iotests/042 @@ -0,0 +1,78 @@ +#!/bin/bash +# +# Test qemu-img operation on zero size images +# +# Copyright (C) 2012 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +# creator +owner=kwolf@redhat.com + +seq=`basename $0` +echo "QA output created by $seq" + +here=`pwd` +tmp=/tmp/$$ +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter + +_supported_fmt qcow2 qcow qed vmdk +_supported_proto file +_supported_os Linux + +echo +echo "== Creating zero size image ==" + +_make_test_img 0 +_check_test_img + +mv $TEST_IMG $TEST_IMG.orig + +echo +echo "== Converting the image ==" + +$QEMU_IMG convert -O $IMGFMT $TEST_IMG.orig $TEST_IMG +_check_test_img + +echo +echo "== Converting the image, compressed ==" + +if [ "$IMGFMT" == "qcow2" ]; then + $QEMU_IMG convert -c -O $IMGFMT $TEST_IMG.orig $TEST_IMG +fi +_check_test_img + +echo +echo "== Rebasing the image ==" + +$QEMU_IMG rebase -u -b $TEST_IMG.orig $TEST_IMG +$QEMU_IMG rebase -b $TEST_IMG.orig $TEST_IMG +_check_test_img + +# success, all done +echo "*** done" +rm -f $seq.full +status=0 + diff --git a/tests/qemu-iotests/042.out b/tests/qemu-iotests/042.out new file mode 100644 index 0000000..dc80f4b --- /dev/null +++ b/tests/qemu-iotests/042.out @@ -0,0 +1,15 @@ +QA output created by 042 + +== Creating zero size image == +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=0 +No errors were found on the image. + +== Converting the image == +No errors were found on the image. + +== Converting the image, compressed == +No errors were found on the image. + +== Rebasing the image == +No errors were found on the image. +*** done diff --git a/tests/qemu-iotests/043 b/tests/qemu-iotests/043 new file mode 100755 index 0000000..3ba08dc --- /dev/null +++ b/tests/qemu-iotests/043 @@ -0,0 +1,95 @@ +#!/bin/bash +# +# Test that qemu-img info --backing-chain detects infinite loops +# +# Copyright (C) 2012 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +# creator +owner=stefanha@redhat.com + +seq=`basename $0` +echo "QA output created by $seq" + +here=`pwd` +tmp=/tmp/$$ +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img + rm -f $TEST_IMG.[123].base +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter + +# Any format supporting backing files +_supported_fmt qcow qcow2 vmdk qed +_supported_proto generic +_supported_os Linux + + +size=128M +_make_test_img $size +$QEMU_IMG rebase -u -b $TEST_IMG $TEST_IMG + +echo +echo "== backing file references self ==" +_img_info --backing-chain + +_make_test_img $size +mv $TEST_IMG $TEST_IMG.base +_make_test_img -b $TEST_IMG.base $size +$QEMU_IMG rebase -u -b $TEST_IMG $TEST_IMG.base + +echo +echo "== parent references self ==" +_img_info --backing-chain + +_make_test_img $size +mv $TEST_IMG $TEST_IMG.1.base +_make_test_img -b $TEST_IMG.1.base $size +mv $TEST_IMG $TEST_IMG.2.base +_make_test_img -b $TEST_IMG.2.base $size +mv $TEST_IMG $TEST_IMG.3.base +_make_test_img -b $TEST_IMG.3.base $size +$QEMU_IMG rebase -u -b $TEST_IMG.2.base $TEST_IMG.1.base + +echo +echo "== ancestor references another ancestor ==" +_img_info --backing-chain + +_make_test_img $size +mv $TEST_IMG $TEST_IMG.1.base +_make_test_img -b $TEST_IMG.1.base $size +mv $TEST_IMG $TEST_IMG.2.base +_make_test_img -b $TEST_IMG.2.base $size + +echo +echo "== finite chain of length 3 (human) ==" +_img_info --backing-chain + +echo +echo "== finite chain of length 3 (json) ==" +_img_info --backing-chain --output=json + +# success, all done +echo "*** done" +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/043.out b/tests/qemu-iotests/043.out new file mode 100644 index 0000000..ad23337 --- /dev/null +++ b/tests/qemu-iotests/043.out @@ -0,0 +1,66 @@ +QA output created by 043 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 + +== backing file references self == +qemu-img: Backing file 'TEST_DIR/t.IMGFMT' creates an infinite loop. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file='TEST_DIR/t.IMGFMT.base' + +== parent references self == +qemu-img: Backing file 'TEST_DIR/t.IMGFMT' creates an infinite loop. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file='TEST_DIR/t.IMGFMT.1.base' +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file='TEST_DIR/t.IMGFMT.2.base' +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file='TEST_DIR/t.IMGFMT.3.base' + +== ancestor references another ancestor == +qemu-img: Backing file 'TEST_DIR/t.IMGFMT.2.base' creates an infinite loop. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file='TEST_DIR/t.IMGFMT.1.base' +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file='TEST_DIR/t.IMGFMT.2.base' + +== finite chain of length 3 (human) == +image: TEST_DIR/t.IMGFMT +file format: IMGFMT +virtual size: 128M (134217728 bytes) +cluster_size: 65536 +backing file: TEST_DIR/t.IMGFMT.2.base + +image: TEST_DIR/t.IMGFMT.2.base +file format: IMGFMT +virtual size: 128M (134217728 bytes) +cluster_size: 65536 +backing file: TEST_DIR/t.IMGFMT.1.base + +image: TEST_DIR/t.IMGFMT.1.base +file format: IMGFMT +virtual size: 128M (134217728 bytes) +cluster_size: 65536 + +== finite chain of length 3 (json) == +[ + { + "virtual-size": 134217728, + "filename": "TEST_DIR/t.IMGFMT", + "cluster-size": 65536, + "format": "IMGFMT", + "backing-filename": "TEST_DIR/t.IMGFMT.2.base", + "dirty-flag": false + }, + { + "virtual-size": 134217728, + "filename": "TEST_DIR/t.IMGFMT.2.base", + "cluster-size": 65536, + "format": "IMGFMT", + "backing-filename": "TEST_DIR/t.IMGFMT.1.base", + "dirty-flag": false + }, + { + "virtual-size": 134217728, + "filename": "TEST_DIR/t.IMGFMT.1.base", + "cluster-size": 65536, + "format": "IMGFMT", + "dirty-flag": false + } +] +*** done diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc index d534e94..334534f 100644 --- a/tests/qemu-iotests/common.rc +++ b/tests/qemu-iotests/common.rc @@ -145,6 +145,16 @@ _check_test_img() sed -e 's/qemu-img\: This image format does not support checks/No errors were found on the image./' } +_img_info() +{ + $QEMU_IMG info "$@" $TEST_IMG 2>&1 | \ + sed -e "s#$IMGPROTO:$TEST_DIR#TEST_DIR#g" \ + -e "s#$TEST_DIR#TEST_DIR#g" \ + -e "s#$IMGFMT#IMGFMT#g" \ + -e "/^disk size:/ D" \ + -e "/actual-size/ D" +} + _get_pids_by_name() { if [ $# -ne 1 ] diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index 66d2ba9..ac86f54 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -47,3 +47,6 @@ 038 rw auto backing 039 rw auto 040 rw auto +041 rw auto backing +042 rw auto quick +043 rw auto backing diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py index 3c60b2d..735c674 100644 --- a/tests/qemu-iotests/iotests.py +++ b/tests/qemu-iotests/iotests.py @@ -106,6 +106,10 @@ class VM(object): return self._qmp.cmd(cmd, args=qmp_args) + def get_qmp_event(self, wait=False): + '''Poll for one queued QMP events and return it''' + return self._qmp.pull_event(wait=wait) + def get_qmp_events(self, wait=False): '''Poll for queued QMP events and return a list of dicts''' events = self._qmp.get_events(wait=wait) diff --git a/trace-events b/trace-events index e2d4580..09b5d55 100644 --- a/trace-events +++ b/trace-events @@ -77,10 +77,18 @@ stream_start(void *bs, void *base, void *s, void *co, void *opaque) "bs %p base commit_one_iteration(void *s, int64_t sector_num, int nb_sectors, int is_allocated) "s %p sector_num %"PRId64" nb_sectors %d is_allocated %d" commit_start(void *bs, void *base, void *top, void *s, void *co, void *opaque) "bs %p base %p top %p s %p co %p opaque %p" +# block/mirror.c +mirror_start(void *bs, void *s, void *co, void *opaque) "bs %p s %p co %p opaque %p" +mirror_before_flush(void *s) "s %p" +mirror_before_drain(void *s, int64_t cnt) "s %p dirty count %"PRId64 +mirror_before_sleep(void *s, int64_t cnt, int synced) "s %p dirty count %"PRId64" synced %d" +mirror_one_iteration(void *s, int64_t sector_num, int nb_sectors) "s %p sector_num %"PRId64" nb_sectors %d" + # blockdev.c qmp_block_job_cancel(void *job) "job %p" qmp_block_job_pause(void *job) "job %p" qmp_block_job_resume(void *job) "job %p" +qmp_block_job_complete(void *job) "job %p" block_job_cb(void *bs, void *job, int ret) "bs %p job %p ret %d" qmp_block_stream(void *bs, void *job) "bs %p job %p" @@ -800,6 +800,78 @@ bool usb_enabled(bool default_usb) return default_usb; } +#ifndef _WIN32 +static int parse_add_fd(QemuOpts *opts, void *opaque) +{ + int fd, dupfd, flags; + int64_t fdset_id; + const char *fd_opaque = NULL; + + fd = qemu_opt_get_number(opts, "fd", -1); + fdset_id = qemu_opt_get_number(opts, "set", -1); + fd_opaque = qemu_opt_get(opts, "opaque"); + + if (fd < 0) { + qerror_report(ERROR_CLASS_GENERIC_ERROR, + "fd option is required and must be non-negative"); + return -1; + } + + if (fd <= STDERR_FILENO) { + qerror_report(ERROR_CLASS_GENERIC_ERROR, + "fd cannot be a standard I/O stream"); + return -1; + } + + /* + * All fds inherited across exec() necessarily have FD_CLOEXEC + * clear, while qemu sets FD_CLOEXEC on all other fds used internally. + */ + flags = fcntl(fd, F_GETFD); + if (flags == -1 || (flags & FD_CLOEXEC)) { + qerror_report(ERROR_CLASS_GENERIC_ERROR, + "fd is not valid or already in use"); + return -1; + } + + if (fdset_id < 0) { + qerror_report(ERROR_CLASS_GENERIC_ERROR, + "set option is required and must be non-negative"); + return -1; + } + +#ifdef F_DUPFD_CLOEXEC + dupfd = fcntl(fd, F_DUPFD_CLOEXEC, 0); +#else + dupfd = dup(fd); + if (dupfd != -1) { + qemu_set_cloexec(dupfd); + } +#endif + if (dupfd == -1) { + qerror_report(ERROR_CLASS_GENERIC_ERROR, + "Error duplicating fd: %s", strerror(errno)); + return -1; + } + + /* add the duplicate fd, and optionally the opaque string, to the fd set */ + monitor_fdset_add_fd(dupfd, true, fdset_id, fd_opaque ? true : false, + fd_opaque, NULL); + + return 0; +} + +static int cleanup_add_fd(QemuOpts *opts, void *opaque) +{ + int fd; + + fd = qemu_opt_get_number(opts, "fd", -1); + close(fd); + + return 0; +} +#endif + /***********************************************************/ /* QEMU Block devices */ @@ -3327,6 +3399,18 @@ int main(int argc, char **argv, char **envp) exit(0); } break; + case QEMU_OPTION_add_fd: +#ifndef _WIN32 + opts = qemu_opts_parse(qemu_find_opts("add-fd"), optarg, 0); + if (!opts) { + exit(0); + } +#else + error_report("File descriptor passing is disabled on this " + "platform"); + exit(1); +#endif + break; default: os_parse_cmd_args(popt->index, optarg); } @@ -3338,6 +3422,16 @@ int main(int argc, char **argv, char **envp) exit(1); } +#ifndef _WIN32 + if (qemu_opts_foreach(qemu_find_opts("add-fd"), parse_add_fd, NULL, 1)) { + exit(1); + } + + if (qemu_opts_foreach(qemu_find_opts("add-fd"), cleanup_add_fd, NULL, 1)) { + exit(1); + } +#endif + if (machine == NULL) { fprintf(stderr, "No machine found.\n"); exit(1); |