diff options
author | Anthony Liguori <aliguori@amazon.com> | 2014-01-24 15:43:30 -0800 |
---|---|---|
committer | Anthony Liguori <aliguori@amazon.com> | 2014-01-24 15:43:30 -0800 |
commit | 0d688cf7d8d71bce2aab83173552a784e96b6729 (patch) | |
tree | 543f7cc92a36e0157cf35e474ee4eb74b8f6156e | |
parent | 732c66ce641c69702a7e7fdb73b68f0c1b583ab5 (diff) | |
parent | d5103588aa39157c8eea3bb5fb6780bbd8be21b7 (diff) | |
download | qemu-0d688cf7d8d71bce2aab83173552a784e96b6729.zip qemu-0d688cf7d8d71bce2aab83173552a784e96b6729.tar.gz qemu-0d688cf7d8d71bce2aab83173552a784e96b6729.tar.bz2 |
Merge remote-tracking branch 'kwolf/tags/for-anthony' into staging
Block patches
# gpg: Signature made Fri 24 Jan 2014 08:40:53 AM PST using RSA key ID C88F2FD6
# gpg: Can't check signature: public key not found
* kwolf/tags/for-anthony: (93 commits)
block: Switch bdrv_io_limits_intercept() to byte granularity
qemu-iotests: Test pwritev RMW logic
qemu-io: New command 'sleep'
blkdebug: Make required alignment configurable
iscsi: Set bs->request_alignment
block: Make bdrv_pwrite() a bdrv_prwv_co() wrapper
block: Make bdrv_pread() a bdrv_prwv_co() wrapper
block: Change coroutine wrapper to byte granularity
block: Assert serialisation assumptions in pwritev
block: Align requests in bdrv_co_do_pwritev()
block: Allow wait_serialising_requests() at any point
block: Make overlap range for serialisation dynamic
block: Generalise and optimise COR serialisation
block: Make zero-after-EOF work with larger alignment
block: Allow waiting for overlapping requests between begin/end
block: Switch BdrvTrackedRequest to byte granularity
block: Introduce bdrv_co_do_pwritev()
block: write: Handle COR dependency after I/O throttling
block: Introduce bdrv_aligned_pwritev()
block: Introduce bdrv_co_do_preadv()
...
Message-id: 1390584136-24703-1-git-send-email-kwolf@redhat.com
Signed-off-by: Anthony Liguori <aliguori@amazon.com>
77 files changed, 3579 insertions, 885 deletions
diff --git a/Makefile.objs b/Makefile.objs index 857bb53..ac1d0e1 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -43,7 +43,6 @@ libcacard-y += libcacard/vcardt.o ifeq ($(CONFIG_SOFTMMU),y) common-obj-y = $(block-obj-y) blockdev.o blockdev-nbd.o block/ common-obj-y += net/ -common-obj-y += readline.o common-obj-y += qdev-monitor.o device-hotplug.o common-obj-$(CONFIG_WIN32) += os-win32.o common-obj-$(CONFIG_POSIX) += os-posix.o @@ -32,6 +32,7 @@ #include "sysemu/sysemu.h" #include "qemu/notify.h" #include "block/coroutine.h" +#include "block/qapi.h" #include "qmp-commands.h" #include "qemu/timer.h" @@ -69,11 +70,11 @@ static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs, static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *iov); -static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, +static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs, + int64_t offset, unsigned int bytes, QEMUIOVector *qiov, BdrvRequestFlags flags); -static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, +static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs, + int64_t offset, unsigned int bytes, QEMUIOVector *qiov, BdrvRequestFlags flags); static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs, int64_t sector_num, @@ -90,6 +91,9 @@ static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs, static QTAILQ_HEAD(, BlockDriverState) bdrv_states = QTAILQ_HEAD_INITIALIZER(bdrv_states); +static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states = + QTAILQ_HEAD_INITIALIZER(graph_bdrv_states); + static QLIST_HEAD(, BlockDriver) bdrv_drivers = QLIST_HEAD_INITIALIZER(bdrv_drivers); @@ -188,7 +192,7 @@ void bdrv_io_limits_enable(BlockDriverState *bs) * @is_write: is the IO a write */ static void bdrv_io_limits_intercept(BlockDriverState *bs, - int nb_sectors, + unsigned int bytes, bool is_write) { /* does this io must wait */ @@ -201,9 +205,8 @@ static void bdrv_io_limits_intercept(BlockDriverState *bs, } /* the IO will be executed, do the accounting */ - throttle_account(&bs->throttle_state, - is_write, - nb_sectors * BDRV_SECTOR_SIZE); + throttle_account(&bs->throttle_state, is_write, bytes); + /* if the next request must wait -> do nothing */ if (throttle_schedule_timer(&bs->throttle_state, is_write)) { @@ -214,6 +217,16 @@ static void bdrv_io_limits_intercept(BlockDriverState *bs, qemu_co_queue_next(&bs->throttled_reqs[is_write]); } +size_t bdrv_opt_mem_align(BlockDriverState *bs) +{ + if (!bs || !bs->drv) { + /* 4k should be on the safe side */ + return 4096; + } + + return bs->bl.opt_mem_alignment; +} + /* check if the path starts with "<protocol>:" */ static int path_has_protocol(const char *path) { @@ -327,7 +340,7 @@ BlockDriverState *bdrv_new(const char *device_name) QLIST_INIT(&bs->dirty_bitmaps); pstrcpy(bs->device_name, sizeof(bs->device_name), device_name); if (device_name[0] != '\0') { - QTAILQ_INSERT_TAIL(&bdrv_states, bs, list); + QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list); } bdrv_iostatus_disable(bs); notifier_list_init(&bs->close_notifiers); @@ -479,6 +492,43 @@ int bdrv_create_file(const char* filename, QEMUOptionParameter *options, return ret; } +int bdrv_refresh_limits(BlockDriverState *bs) +{ + BlockDriver *drv = bs->drv; + + memset(&bs->bl, 0, sizeof(bs->bl)); + + if (!drv) { + return 0; + } + + /* Take some limits from the children as a default */ + if (bs->file) { + bdrv_refresh_limits(bs->file); + bs->bl.opt_transfer_length = bs->file->bl.opt_transfer_length; + bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment; + } else { + bs->bl.opt_mem_alignment = 512; + } + + if (bs->backing_hd) { + bdrv_refresh_limits(bs->backing_hd); + bs->bl.opt_transfer_length = + MAX(bs->bl.opt_transfer_length, + bs->backing_hd->bl.opt_transfer_length); + bs->bl.opt_mem_alignment = + MAX(bs->bl.opt_mem_alignment, + bs->backing_hd->bl.opt_mem_alignment); + } + + /* Then let the driver override it */ + if (drv->bdrv_refresh_limits) { + return drv->bdrv_refresh_limits(bs); + } + + return 0; +} + /* * Create a uniquely-named empty temporary file. * Return 0 upon success, otherwise a negative errno value. @@ -732,6 +782,33 @@ static int bdrv_open_flags(BlockDriverState *bs, int flags) return open_flags; } +static int bdrv_assign_node_name(BlockDriverState *bs, + const char *node_name, + Error **errp) +{ + if (!node_name) { + return 0; + } + + /* empty string node name is invalid */ + if (node_name[0] == '\0') { + error_setg(errp, "Empty node name"); + return -EINVAL; + } + + /* takes care of avoiding duplicates node names */ + if (bdrv_find_node(node_name)) { + error_setg(errp, "Duplicate node name"); + return -EINVAL; + } + + /* copy node name into the bs and insert it into the graph list */ + pstrcpy(bs->node_name, sizeof(bs->node_name), node_name); + QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list); + + return 0; +} + /* * Common part for opening disk images and files * @@ -742,6 +819,7 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file, { int ret, open_flags; const char *filename; + const char *node_name = NULL; Error *local_err = NULL; assert(drv != NULL); @@ -756,6 +834,13 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file, trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name); + node_name = qdict_get_try_str(options, "node-name"); + ret = bdrv_assign_node_name(bs, node_name, errp); + if (ret < 0) { + return ret; + } + qdict_del(options, "node-name"); + /* bdrv_open() with directly using a protocol as drv. This layer is already * opened, so assign it to bs (while file becomes a closed BlockDriverState) * and return immediately. */ @@ -765,7 +850,8 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file, } bs->open_flags = flags; - bs->buffer_alignment = 512; + bs->guest_block_size = 512; + bs->request_alignment = 512; bs->zero_beyond_eof = true; open_flags = bdrv_open_flags(bs, flags); bs->read_only = !(open_flags & BDRV_O_RDWR); @@ -833,6 +919,10 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file, goto free_and_fail; } + bdrv_refresh_limits(bs); + assert(bdrv_opt_mem_align(bs) != 0); + assert(bs->request_alignment != 0); + #ifndef _WIN32 if (bs->is_temporary) { assert(bs->filename[0] != '\0'); @@ -858,9 +948,10 @@ free_and_fail: * dictionary, it needs to use QINCREF() before calling bdrv_file_open. */ int bdrv_file_open(BlockDriverState **pbs, const char *filename, - QDict *options, int flags, Error **errp) + const char *reference, QDict *options, int flags, + Error **errp) { - BlockDriverState *bs; + BlockDriverState *bs = NULL; BlockDriver *drv; const char *drvname; bool allow_protocol_prefix = false; @@ -872,6 +963,24 @@ int bdrv_file_open(BlockDriverState **pbs, const char *filename, options = qdict_new(); } + if (reference) { + if (filename || qdict_size(options)) { + error_setg(errp, "Cannot reference an existing block device with " + "additional options or a new filename"); + return -EINVAL; + } + QDECREF(options); + + bs = bdrv_find(reference); + if (!bs) { + error_setg(errp, "Cannot find block device '%s'", reference); + return -ENODEV; + } + bdrv_ref(bs); + *pbs = bs; + return 0; + } + bs = bdrv_new(""); bs->options = options; options = qdict_clone_shallow(options); @@ -929,14 +1038,19 @@ int bdrv_file_open(BlockDriverState **pbs, const char *filename, goto fail; } - ret = bdrv_open_common(bs, NULL, options, flags, drv, &local_err); + if (!drv->bdrv_file_open) { + ret = bdrv_open(bs, filename, options, flags, drv, &local_err); + options = NULL; + } else { + ret = bdrv_open_common(bs, NULL, options, flags, drv, &local_err); + } if (ret < 0) { error_propagate(errp, local_err); goto fail; } /* Check if any unknown options were used */ - if (qdict_size(options) != 0) { + if (options && (qdict_size(options) != 0)) { const QDictEntry *entry = qdict_first(options); error_setg(errp, "Block protocol '%s' doesn't support the option '%s'", drv->format_name, entry->key); @@ -1016,12 +1130,92 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp) error_free(local_err); return ret; } - pstrcpy(bs->backing_file, sizeof(bs->backing_file), - bs->backing_hd->file->filename); + + if (bs->backing_hd->file) { + pstrcpy(bs->backing_file, sizeof(bs->backing_file), + bs->backing_hd->file->filename); + } + + /* Recalculate the BlockLimits with the backing file */ + bdrv_refresh_limits(bs); + return 0; } /* + * Opens a disk image whose options are given as BlockdevRef in another block + * device's options. + * + * If force_raw is true, bdrv_file_open() will be used, thereby preventing any + * image format auto-detection. If it is false and a filename is given, + * bdrv_open() will be used for auto-detection. + * + * If allow_none is true, no image will be opened if filename is false and no + * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned. + * + * bdrev_key specifies the key for the image's BlockdevRef in the options QDict. + * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict + * itself, all options starting with "${bdref_key}." are considered part of the + * BlockdevRef. + * + * The BlockdevRef will be removed from the options QDict. + */ +int bdrv_open_image(BlockDriverState **pbs, const char *filename, + QDict *options, const char *bdref_key, int flags, + bool force_raw, bool allow_none, Error **errp) +{ + QDict *image_options; + int ret; + char *bdref_key_dot; + const char *reference; + + bdref_key_dot = g_strdup_printf("%s.", bdref_key); + qdict_extract_subqdict(options, &image_options, bdref_key_dot); + g_free(bdref_key_dot); + + reference = qdict_get_try_str(options, bdref_key); + if (!filename && !reference && !qdict_size(image_options)) { + if (allow_none) { + ret = 0; + } else { + error_setg(errp, "A block device must be specified for \"%s\"", + bdref_key); + ret = -EINVAL; + } + goto done; + } + + if (filename && !force_raw) { + /* If a filename is given and the block driver should be detected + automatically (instead of using none), use bdrv_open() in order to do + that auto-detection. */ + BlockDriverState *bs; + + if (reference) { + error_setg(errp, "Cannot reference an existing block device while " + "giving a filename"); + ret = -EINVAL; + goto done; + } + + bs = bdrv_new(""); + ret = bdrv_open(bs, filename, image_options, flags, NULL, errp); + if (ret < 0) { + bdrv_unref(bs); + } else { + *pbs = bs; + } + } else { + ret = bdrv_file_open(pbs, filename, reference, image_options, flags, + errp); + } + +done: + qdict_del(options, bdref_key); + return ret; +} + +/* * Opens a disk image (raw, qcow2, vmdk, ...) * * options is a QDict of options to pass to the block drivers, or NULL for an @@ -1036,7 +1230,6 @@ int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options, /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */ char tmp_filename[PATH_MAX + 1]; BlockDriverState *file = NULL; - QDict *file_options = NULL; const char *drvname; Error *local_err = NULL; @@ -1122,10 +1315,9 @@ int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options, flags |= BDRV_O_ALLOW_RDWR; } - qdict_extract_subqdict(options, &file_options, "file."); - - ret = bdrv_file_open(&file, filename, file_options, - bdrv_open_flags(bs, flags | BDRV_O_UNMAP), &local_err); + ret = bdrv_open_image(&file, filename, options, "file", + bdrv_open_flags(bs, flags | BDRV_O_UNMAP), true, true, + &local_err); if (ret < 0) { goto fail; } @@ -1143,7 +1335,13 @@ int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options, } if (!drv) { - ret = find_image_format(file, filename, &drv, &local_err); + if (file) { + ret = find_image_format(file, filename, &drv, &local_err); + } else { + error_setg(errp, "Must specify either driver or file"); + ret = -EINVAL; + goto unlink_and_fail; + } } if (!drv) { @@ -1156,7 +1354,7 @@ int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options, goto unlink_and_fail; } - if (bs->file != file) { + if (file && (bs->file != file)) { bdrv_unref(file); file = NULL; } @@ -1427,6 +1625,8 @@ void bdrv_reopen_commit(BDRVReopenState *reopen_state) reopen_state->bs->enable_write_cache = !!(reopen_state->flags & BDRV_O_CACHE_WB); reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR); + + bdrv_refresh_limits(reopen_state->bs); } /* @@ -1501,7 +1701,7 @@ void bdrv_close_all(void) { BlockDriverState *bs; - QTAILQ_FOREACH(bs, &bdrv_states, list) { + QTAILQ_FOREACH(bs, &bdrv_states, device_list) { bdrv_close(bs); } } @@ -1530,7 +1730,7 @@ static bool bdrv_requests_pending(BlockDriverState *bs) static bool bdrv_requests_pending_all(void) { BlockDriverState *bs; - QTAILQ_FOREACH(bs, &bdrv_states, list) { + QTAILQ_FOREACH(bs, &bdrv_states, device_list) { if (bdrv_requests_pending(bs)) { return true; } @@ -1557,7 +1757,7 @@ void bdrv_drain_all(void) BlockDriverState *bs; while (busy) { - QTAILQ_FOREACH(bs, &bdrv_states, list) { + QTAILQ_FOREACH(bs, &bdrv_states, device_list) { bdrv_start_throttled_reqs(bs); } @@ -1566,14 +1766,19 @@ void bdrv_drain_all(void) } } -/* make a BlockDriverState anonymous by removing from bdrv_state list. +/* make a BlockDriverState anonymous by removing from bdrv_state and + * graph_bdrv_state list. Also, NULL terminate the device_name to prevent double remove */ void bdrv_make_anon(BlockDriverState *bs) { if (bs->device_name[0] != '\0') { - QTAILQ_REMOVE(&bdrv_states, bs, list); + QTAILQ_REMOVE(&bdrv_states, bs, device_list); } bs->device_name[0] = '\0'; + if (bs->node_name[0] != '\0') { + QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list); + } + bs->node_name[0] = '\0'; } static void bdrv_rebind(BlockDriverState *bs) @@ -1593,7 +1798,7 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest, bs_dest->dev_ops = bs_src->dev_ops; bs_dest->dev_opaque = bs_src->dev_opaque; bs_dest->dev = bs_src->dev; - bs_dest->buffer_alignment = bs_src->buffer_alignment; + bs_dest->guest_block_size = bs_src->guest_block_size; bs_dest->copy_on_read = bs_src->copy_on_read; bs_dest->enable_write_cache = bs_src->enable_write_cache; @@ -1627,7 +1832,12 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest, /* keep the same entry in bdrv_states */ pstrcpy(bs_dest->device_name, sizeof(bs_dest->device_name), bs_src->device_name); - bs_dest->list = bs_src->list; + bs_dest->device_list = bs_src->device_list; + + /* keep the same entry in graph_bdrv_states + * We do want to swap name but don't want to swap linked list entries + */ + bs_dest->node_list = bs_src->node_list; } /* @@ -1745,7 +1955,7 @@ void bdrv_detach_dev(BlockDriverState *bs, void *dev) bs->dev = NULL; bs->dev_ops = NULL; bs->dev_opaque = NULL; - bs->buffer_alignment = 512; + bs->guest_block_size = 512; } /* TODO change to return DeviceState * when all users are qdevified */ @@ -1876,10 +2086,10 @@ int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix) int bdrv_commit(BlockDriverState *bs) { BlockDriver *drv = bs->drv; - int64_t sector, total_sectors; + int64_t sector, total_sectors, length, backing_length; int n, ro, open_flags; int ret = 0; - uint8_t *buf; + uint8_t *buf = NULL; char filename[PATH_MAX]; if (!drv) @@ -1904,7 +2114,29 @@ int bdrv_commit(BlockDriverState *bs) } } - total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS; + length = bdrv_getlength(bs); + if (length < 0) { + ret = length; + goto ro_cleanup; + } + + backing_length = bdrv_getlength(bs->backing_hd); + if (backing_length < 0) { + ret = backing_length; + goto ro_cleanup; + } + + /* If our top snapshot is larger than the backing file image, + * grow the backing file image if possible. If not possible, + * we must return an error */ + if (length > backing_length) { + ret = bdrv_truncate(bs->backing_hd, length); + if (ret < 0) { + goto ro_cleanup; + } + } + + total_sectors = length >> BDRV_SECTOR_BITS; buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE); for (sector = 0; sector < total_sectors; sector += n) { @@ -1913,13 +2145,13 @@ int bdrv_commit(BlockDriverState *bs) goto ro_cleanup; } if (ret) { - if (bdrv_read(bs, sector, buf, n) != 0) { - ret = -EIO; + ret = bdrv_read(bs, sector, buf, n); + if (ret < 0) { goto ro_cleanup; } - if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) { - ret = -EIO; + ret = bdrv_write(bs->backing_hd, sector, buf, n); + if (ret < 0) { goto ro_cleanup; } } @@ -1927,6 +2159,9 @@ int bdrv_commit(BlockDriverState *bs) if (drv->bdrv_make_empty) { ret = drv->bdrv_make_empty(bs); + if (ret < 0) { + goto ro_cleanup; + } bdrv_flush(bs); } @@ -1934,9 +2169,11 @@ int bdrv_commit(BlockDriverState *bs) * Make sure all data we wrote to the backing device is actually * stable on disk. */ - if (bs->backing_hd) + if (bs->backing_hd) { bdrv_flush(bs->backing_hd); + } + ret = 0; ro_cleanup: g_free(buf); @@ -1952,7 +2189,7 @@ int bdrv_commit_all(void) { BlockDriverState *bs; - QTAILQ_FOREACH(bs, &bdrv_states, list) { + QTAILQ_FOREACH(bs, &bdrv_states, device_list) { if (bs->drv && bs->backing_hd) { int ret = bdrv_commit(bs); if (ret < 0) { @@ -1970,6 +2207,10 @@ int bdrv_commit_all(void) */ static void tracked_request_end(BdrvTrackedRequest *req) { + if (req->serialising) { + req->bs->serialising_in_flight--; + } + QLIST_REMOVE(req, list); qemu_co_queue_restart_all(&req->wait_queue); } @@ -1979,15 +2220,18 @@ static void tracked_request_end(BdrvTrackedRequest *req) */ static void tracked_request_begin(BdrvTrackedRequest *req, BlockDriverState *bs, - int64_t sector_num, - int nb_sectors, bool is_write) + int64_t offset, + unsigned int bytes, bool is_write) { *req = (BdrvTrackedRequest){ .bs = bs, - .sector_num = sector_num, - .nb_sectors = nb_sectors, - .is_write = is_write, - .co = qemu_coroutine_self(), + .offset = offset, + .bytes = bytes, + .is_write = is_write, + .co = qemu_coroutine_self(), + .serialising = false, + .overlap_offset = offset, + .overlap_bytes = bytes, }; qemu_co_queue_init(&req->wait_queue); @@ -1995,6 +2239,21 @@ static void tracked_request_begin(BdrvTrackedRequest *req, QLIST_INSERT_HEAD(&bs->tracked_requests, req, list); } +static void mark_request_serialising(BdrvTrackedRequest *req, size_t align) +{ + int64_t overlap_offset = req->offset & ~(align - 1); + int overlap_bytes = ROUND_UP(req->offset + req->bytes, align) + - overlap_offset; + + if (!req->serialising) { + req->bs->serialising_in_flight++; + req->serialising = true; + } + + req->overlap_offset = MIN(req->overlap_offset, overlap_offset); + req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes); +} + /** * Round a region to cluster boundaries */ @@ -2016,53 +2275,75 @@ void bdrv_round_to_clusters(BlockDriverState *bs, } } +static int bdrv_get_cluster_size(BlockDriverState *bs) +{ + BlockDriverInfo bdi; + int ret; + + ret = bdrv_get_info(bs, &bdi); + if (ret < 0 || bdi.cluster_size == 0) { + return bs->request_alignment; + } else { + return bdi.cluster_size; + } +} + static bool tracked_request_overlaps(BdrvTrackedRequest *req, - int64_t sector_num, int nb_sectors) { + int64_t offset, unsigned int bytes) +{ /* aaaa bbbb */ - if (sector_num >= req->sector_num + req->nb_sectors) { + if (offset >= req->overlap_offset + req->overlap_bytes) { return false; } /* bbbb aaaa */ - if (req->sector_num >= sector_num + nb_sectors) { + if (req->overlap_offset >= offset + bytes) { return false; } return true; } -static void coroutine_fn wait_for_overlapping_requests(BlockDriverState *bs, - int64_t sector_num, int nb_sectors) +static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self) { + BlockDriverState *bs = self->bs; BdrvTrackedRequest *req; - int64_t cluster_sector_num; - int cluster_nb_sectors; bool retry; + bool waited = false; - /* If we touch the same cluster it counts as an overlap. This guarantees - * that allocating writes will be serialized and not race with each other - * for the same cluster. For example, in copy-on-read it ensures that the - * CoR read and write operations are atomic and guest writes cannot - * interleave between them. - */ - bdrv_round_to_clusters(bs, sector_num, nb_sectors, - &cluster_sector_num, &cluster_nb_sectors); + if (!bs->serialising_in_flight) { + return false; + } do { retry = false; QLIST_FOREACH(req, &bs->tracked_requests, list) { - if (tracked_request_overlaps(req, cluster_sector_num, - cluster_nb_sectors)) { + if (req == self || (!req->serialising && !self->serialising)) { + continue; + } + if (tracked_request_overlaps(req, self->overlap_offset, + self->overlap_bytes)) + { /* Hitting this means there was a reentrant request, for * example, a block driver issuing nested requests. This must * never happen since it means deadlock. */ assert(qemu_coroutine_self() != req->co); - qemu_co_queue_wait(&req->wait_queue); - retry = true; - break; + /* If the request is already (indirectly) waiting for us, or + * will wait for us as soon as it wakes up, then just go on + * (instead of producing a deadlock in the former case). */ + if (!req->waiting_for) { + self->waiting_for = req; + qemu_co_queue_wait(&req->wait_queue); + self->waiting_for = NULL; + retry = true; + waited = true; + break; + } } } } while (retry); + + return waited; } /* @@ -2224,6 +2505,7 @@ int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top, } new_top_bs->backing_hd = base_bs; + bdrv_refresh_limits(new_top_bs); QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) { /* so that bdrv_close() does not recursively close the chain */ @@ -2271,8 +2553,7 @@ static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num, typedef struct RwCo { BlockDriverState *bs; - int64_t sector_num; - int nb_sectors; + int64_t offset; QEMUIOVector *qiov; bool is_write; int ret; @@ -2284,34 +2565,32 @@ static void coroutine_fn bdrv_rw_co_entry(void *opaque) RwCo *rwco = opaque; if (!rwco->is_write) { - rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num, - rwco->nb_sectors, rwco->qiov, - rwco->flags); - } else { - rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num, - rwco->nb_sectors, rwco->qiov, + rwco->ret = bdrv_co_do_preadv(rwco->bs, rwco->offset, + rwco->qiov->size, rwco->qiov, rwco->flags); + } else { + rwco->ret = bdrv_co_do_pwritev(rwco->bs, rwco->offset, + rwco->qiov->size, rwco->qiov, + rwco->flags); } } /* * Process a vectored synchronous request using coroutines */ -static int bdrv_rwv_co(BlockDriverState *bs, int64_t sector_num, - QEMUIOVector *qiov, bool is_write, - BdrvRequestFlags flags) +static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset, + QEMUIOVector *qiov, bool is_write, + BdrvRequestFlags flags) { Coroutine *co; RwCo rwco = { .bs = bs, - .sector_num = sector_num, - .nb_sectors = qiov->size >> BDRV_SECTOR_BITS, + .offset = offset, .qiov = qiov, .is_write = is_write, .ret = NOT_DONE, .flags = flags, }; - assert((qiov->size & (BDRV_SECTOR_SIZE - 1)) == 0); /** * In sync call context, when the vcpu is blocked, this throttling timer @@ -2350,7 +2629,8 @@ static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf, }; qemu_iovec_init_external(&qiov, &iov, 1); - return bdrv_rwv_co(bs, sector_num, &qiov, is_write, flags); + return bdrv_prwv_co(bs, sector_num << BDRV_SECTOR_BITS, + &qiov, is_write, flags); } /* return < 0 if error. See bdrv_write() for the return codes */ @@ -2386,11 +2666,6 @@ int bdrv_write(BlockDriverState *bs, int64_t sector_num, return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true, 0); } -int bdrv_writev(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov) -{ - return bdrv_rwv_co(bs, sector_num, qiov, true, 0); -} - int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num, int nb_sectors, BdrvRequestFlags flags) { @@ -2440,117 +2715,53 @@ int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags) } } -int bdrv_pread(BlockDriverState *bs, int64_t offset, - void *buf, int count1) +int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int bytes) { - uint8_t tmp_buf[BDRV_SECTOR_SIZE]; - int len, nb_sectors, count; - int64_t sector_num; + QEMUIOVector qiov; + struct iovec iov = { + .iov_base = (void *)buf, + .iov_len = bytes, + }; int ret; - count = count1; - /* first read to align to sector start */ - len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1); - if (len > count) - len = count; - sector_num = offset >> BDRV_SECTOR_BITS; - if (len > 0) { - if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0) - return ret; - memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len); - count -= len; - if (count == 0) - return count1; - sector_num++; - buf += len; - } - - /* read the sectors "in place" */ - nb_sectors = count >> BDRV_SECTOR_BITS; - if (nb_sectors > 0) { - if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0) - return ret; - sector_num += nb_sectors; - len = nb_sectors << BDRV_SECTOR_BITS; - buf += len; - count -= len; + if (bytes < 0) { + return -EINVAL; } - /* add data from the last sector */ - if (count > 0) { - if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0) - return ret; - memcpy(buf, tmp_buf, count); + qemu_iovec_init_external(&qiov, &iov, 1); + ret = bdrv_prwv_co(bs, offset, &qiov, false, 0); + if (ret < 0) { + return ret; } - return count1; + + return bytes; } int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov) { - uint8_t tmp_buf[BDRV_SECTOR_SIZE]; - int len, nb_sectors, count; - int64_t sector_num; int ret; - count = qiov->size; - - /* first write to align to sector start */ - len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1); - if (len > count) - len = count; - sector_num = offset >> BDRV_SECTOR_BITS; - if (len > 0) { - if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0) - return ret; - qemu_iovec_to_buf(qiov, 0, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), - len); - if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0) - return ret; - count -= len; - if (count == 0) - return qiov->size; - sector_num++; - } - - /* write the sectors "in place" */ - nb_sectors = count >> BDRV_SECTOR_BITS; - if (nb_sectors > 0) { - QEMUIOVector qiov_inplace; - - qemu_iovec_init(&qiov_inplace, qiov->niov); - qemu_iovec_concat(&qiov_inplace, qiov, len, - nb_sectors << BDRV_SECTOR_BITS); - ret = bdrv_writev(bs, sector_num, &qiov_inplace); - qemu_iovec_destroy(&qiov_inplace); - if (ret < 0) { - return ret; - } - - sector_num += nb_sectors; - len = nb_sectors << BDRV_SECTOR_BITS; - count -= len; + ret = bdrv_prwv_co(bs, offset, qiov, true, 0); + if (ret < 0) { + return ret; } - /* add data from the last sector */ - if (count > 0) { - if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0) - return ret; - qemu_iovec_to_buf(qiov, qiov->size - count, tmp_buf, count); - if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0) - return ret; - } return qiov->size; } int bdrv_pwrite(BlockDriverState *bs, int64_t offset, - const void *buf, int count1) + const void *buf, int bytes) { QEMUIOVector qiov; struct iovec iov = { .iov_base = (void *) buf, - .iov_len = count1, + .iov_len = bytes, }; + if (bytes < 0) { + return -EINVAL; + } + qemu_iovec_init_external(&qiov, &iov, 1); return bdrv_pwritev(bs, offset, &qiov); } @@ -2646,40 +2857,34 @@ err: } /* - * Handle a read request in coroutine context + * Forwards an already correctly aligned request to the BlockDriver. This + * handles copy on read and zeroing after EOF; any other features must be + * implemented by the caller. */ -static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, - BdrvRequestFlags flags) +static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs, + BdrvTrackedRequest *req, int64_t offset, unsigned int bytes, + int64_t align, QEMUIOVector *qiov, int flags) { BlockDriver *drv = bs->drv; - BdrvTrackedRequest req; int ret; - if (!drv) { - return -ENOMEDIUM; - } - if (bdrv_check_request(bs, sector_num, nb_sectors)) { - return -EIO; - } + int64_t sector_num = offset >> BDRV_SECTOR_BITS; + unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS; - if (bs->copy_on_read) { - flags |= BDRV_REQ_COPY_ON_READ; - } - if (flags & BDRV_REQ_COPY_ON_READ) { - bs->copy_on_read_in_flight++; - } + assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); + assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0); - if (bs->copy_on_read_in_flight) { - wait_for_overlapping_requests(bs, sector_num, nb_sectors); - } - - /* throttling disk I/O */ - if (bs->io_limits_enabled) { - bdrv_io_limits_intercept(bs, nb_sectors, false); + /* Handle Copy on Read and associated serialisation */ + if (flags & BDRV_REQ_COPY_ON_READ) { + /* If we touch the same cluster it counts as an overlap. This + * guarantees that allocating writes will be serialized and not race + * with each other for the same cluster. For example, in copy-on-read + * it ensures that the CoR read and write operations are atomic and + * guest writes cannot interleave between them. */ + mark_request_serialising(req, bdrv_get_cluster_size(bs)); } - tracked_request_begin(&req, bs, sector_num, nb_sectors, false); + wait_serialising_requests(req); if (flags & BDRV_REQ_COPY_ON_READ) { int pnum; @@ -2695,6 +2900,7 @@ static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs, } } + /* Forward the request to the BlockDriver */ if (!(bs->zero_beyond_eof && bs->growable)) { ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov); } else { @@ -2708,7 +2914,8 @@ static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs, } total_sectors = DIV_ROUND_UP(len, BDRV_SECTOR_SIZE); - max_nb_sectors = MAX(0, total_sectors - sector_num); + max_nb_sectors = MAX(0, ROUND_UP(total_sectors - sector_num, + align >> BDRV_SECTOR_BITS)); if (max_nb_sectors > 0) { ret = drv->bdrv_co_readv(bs, sector_num, MIN(nb_sectors, max_nb_sectors), qiov); @@ -2726,15 +2933,95 @@ static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs, } out: + return ret; +} + +/* + * Handle a read request in coroutine context + */ +static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs, + int64_t offset, unsigned int bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags) +{ + BlockDriver *drv = bs->drv; + BdrvTrackedRequest req; + + /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */ + uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment); + uint8_t *head_buf = NULL; + uint8_t *tail_buf = NULL; + QEMUIOVector local_qiov; + bool use_local_qiov = false; + int ret; + + if (!drv) { + return -ENOMEDIUM; + } + if (bdrv_check_byte_request(bs, offset, bytes)) { + return -EIO; + } + + if (bs->copy_on_read) { + flags |= BDRV_REQ_COPY_ON_READ; + } + + /* throttling disk I/O */ + if (bs->io_limits_enabled) { + bdrv_io_limits_intercept(bs, bytes, false); + } + + /* Align read if necessary by padding qiov */ + if (offset & (align - 1)) { + head_buf = qemu_blockalign(bs, align); + qemu_iovec_init(&local_qiov, qiov->niov + 2); + qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1)); + qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size); + use_local_qiov = true; + + bytes += offset & (align - 1); + offset = offset & ~(align - 1); + } + + if ((offset + bytes) & (align - 1)) { + if (!use_local_qiov) { + qemu_iovec_init(&local_qiov, qiov->niov + 1); + qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size); + use_local_qiov = true; + } + tail_buf = qemu_blockalign(bs, align); + qemu_iovec_add(&local_qiov, tail_buf, + align - ((offset + bytes) & (align - 1))); + + bytes = ROUND_UP(bytes, align); + } + + tracked_request_begin(&req, bs, offset, bytes, false); + ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align, + use_local_qiov ? &local_qiov : qiov, + flags); tracked_request_end(&req); - if (flags & BDRV_REQ_COPY_ON_READ) { - bs->copy_on_read_in_flight--; + if (use_local_qiov) { + qemu_iovec_destroy(&local_qiov); + qemu_vfree(head_buf); + qemu_vfree(tail_buf); } return ret; } +static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, + BdrvRequestFlags flags) +{ + if (nb_sectors < 0 || nb_sectors > (UINT_MAX >> BDRV_SECTOR_BITS)) { + return -EINVAL; + } + + return bdrv_co_do_preadv(bs, sector_num << BDRV_SECTOR_BITS, + nb_sectors << BDRV_SECTOR_BITS, qiov, flags); +} + int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) { @@ -2828,46 +3115,37 @@ static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs, } /* - * Handle a write request in coroutine context + * Forwards an already correctly aligned write request to the BlockDriver. */ -static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, - BdrvRequestFlags flags) +static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs, + BdrvTrackedRequest *req, int64_t offset, unsigned int bytes, + QEMUIOVector *qiov, int flags) { BlockDriver *drv = bs->drv; - BdrvTrackedRequest req; + bool waited; int ret; - if (!bs->drv) { - return -ENOMEDIUM; - } - if (bs->read_only) { - return -EACCES; - } - if (bdrv_check_request(bs, sector_num, nb_sectors)) { - return -EIO; - } + int64_t sector_num = offset >> BDRV_SECTOR_BITS; + unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS; - if (bs->copy_on_read_in_flight) { - wait_for_overlapping_requests(bs, sector_num, nb_sectors); - } - - /* throttling disk I/O */ - if (bs->io_limits_enabled) { - bdrv_io_limits_intercept(bs, nb_sectors, true); - } + assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); + assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0); - tracked_request_begin(&req, bs, sector_num, nb_sectors, true); + waited = wait_serialising_requests(req); + assert(!waited || !req->serialising); - ret = notifier_with_return_list_notify(&bs->before_write_notifiers, &req); + ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req); if (ret < 0) { /* Do nothing, write notifier decided to fail this request */ } else if (flags & BDRV_REQ_ZERO_WRITE) { + BLKDBG_EVENT(bs, BLKDBG_PWRITEV_ZERO); ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags); } else { + BLKDBG_EVENT(bs, BLKDBG_PWRITEV); ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov); } + BLKDBG_EVENT(bs, BLKDBG_PWRITEV_DONE); if (ret == 0 && !bs->enable_write_cache) { ret = bdrv_co_flush(bs); @@ -2882,11 +3160,143 @@ static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs, bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors); } + return ret; +} + +/* + * Handle a write request in coroutine context + */ +static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs, + int64_t offset, unsigned int bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags) +{ + BdrvTrackedRequest req; + /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */ + uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment); + uint8_t *head_buf = NULL; + uint8_t *tail_buf = NULL; + QEMUIOVector local_qiov; + bool use_local_qiov = false; + int ret; + + if (!bs->drv) { + return -ENOMEDIUM; + } + if (bs->read_only) { + return -EACCES; + } + if (bdrv_check_byte_request(bs, offset, bytes)) { + return -EIO; + } + + /* throttling disk I/O */ + if (bs->io_limits_enabled) { + bdrv_io_limits_intercept(bs, bytes, true); + } + + /* + * Align write if necessary by performing a read-modify-write cycle. + * Pad qiov with the read parts and be sure to have a tracked request not + * only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle. + */ + tracked_request_begin(&req, bs, offset, bytes, true); + + if (offset & (align - 1)) { + QEMUIOVector head_qiov; + struct iovec head_iov; + + mark_request_serialising(&req, align); + wait_serialising_requests(&req); + + head_buf = qemu_blockalign(bs, align); + head_iov = (struct iovec) { + .iov_base = head_buf, + .iov_len = align, + }; + qemu_iovec_init_external(&head_qiov, &head_iov, 1); + + BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_HEAD); + ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align, + align, &head_qiov, 0); + if (ret < 0) { + goto fail; + } + BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD); + + qemu_iovec_init(&local_qiov, qiov->niov + 2); + qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1)); + qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size); + use_local_qiov = true; + + bytes += offset & (align - 1); + offset = offset & ~(align - 1); + } + + if ((offset + bytes) & (align - 1)) { + QEMUIOVector tail_qiov; + struct iovec tail_iov; + size_t tail_bytes; + bool waited; + + mark_request_serialising(&req, align); + waited = wait_serialising_requests(&req); + assert(!waited || !use_local_qiov); + + tail_buf = qemu_blockalign(bs, align); + tail_iov = (struct iovec) { + .iov_base = tail_buf, + .iov_len = align, + }; + qemu_iovec_init_external(&tail_qiov, &tail_iov, 1); + + BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_TAIL); + ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align, + align, &tail_qiov, 0); + if (ret < 0) { + goto fail; + } + BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL); + + if (!use_local_qiov) { + qemu_iovec_init(&local_qiov, qiov->niov + 1); + qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size); + use_local_qiov = true; + } + + tail_bytes = (offset + bytes) & (align - 1); + qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes); + + bytes = ROUND_UP(bytes, align); + } + + ret = bdrv_aligned_pwritev(bs, &req, offset, bytes, + use_local_qiov ? &local_qiov : qiov, + flags); + +fail: tracked_request_end(&req); + if (use_local_qiov) { + qemu_iovec_destroy(&local_qiov); + qemu_vfree(head_buf); + qemu_vfree(tail_buf); + } + return ret; } +static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, + BdrvRequestFlags flags) +{ + if (nb_sectors < 0 || nb_sectors > (INT_MAX >> BDRV_SECTOR_BITS)) { + return -EINVAL; + } + + return bdrv_co_do_pwritev(bs, sector_num << BDRV_SECTOR_BITS, + nb_sectors << BDRV_SECTOR_BITS, qiov, flags); +} + int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) { @@ -3110,11 +3520,12 @@ void bdrv_iterate_format(void (*it)(void *opaque, const char *name), } } +/* This function is to find block backend bs */ BlockDriverState *bdrv_find(const char *name) { BlockDriverState *bs; - QTAILQ_FOREACH(bs, &bdrv_states, list) { + QTAILQ_FOREACH(bs, &bdrv_states, device_list) { if (!strcmp(name, bs->device_name)) { return bs; } @@ -3122,19 +3533,83 @@ BlockDriverState *bdrv_find(const char *name) return NULL; } +/* This function is to find a node in the bs graph */ +BlockDriverState *bdrv_find_node(const char *node_name) +{ + BlockDriverState *bs; + + assert(node_name); + + QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) { + if (!strcmp(node_name, bs->node_name)) { + return bs; + } + } + return NULL; +} + +/* Put this QMP function here so it can access the static graph_bdrv_states. */ +BlockDeviceInfoList *bdrv_named_nodes_list(void) +{ + BlockDeviceInfoList *list, *entry; + BlockDriverState *bs; + + list = NULL; + QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) { + entry = g_malloc0(sizeof(*entry)); + entry->value = bdrv_block_device_info(bs); + entry->next = list; + list = entry; + } + + return list; +} + +BlockDriverState *bdrv_lookup_bs(const char *device, + const char *node_name, + Error **errp) +{ + BlockDriverState *bs = NULL; + + if ((!device && !node_name) || (device && node_name)) { + error_setg(errp, "Use either device or node-name but not both"); + return NULL; + } + + if (device) { + bs = bdrv_find(device); + + if (!bs) { + error_set(errp, QERR_DEVICE_NOT_FOUND, device); + return NULL; + } + + return bs; + } + + bs = bdrv_find_node(node_name); + + if (!bs) { + error_set(errp, QERR_DEVICE_NOT_FOUND, node_name); + return NULL; + } + + return bs; +} + BlockDriverState *bdrv_next(BlockDriverState *bs) { if (!bs) { return QTAILQ_FIRST(&bdrv_states); } - return QTAILQ_NEXT(bs, list); + return QTAILQ_NEXT(bs, device_list); } void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque) { BlockDriverState *bs; - QTAILQ_FOREACH(bs, &bdrv_states, list) { + QTAILQ_FOREACH(bs, &bdrv_states, device_list) { it(opaque, bs); } } @@ -3154,7 +3629,7 @@ int bdrv_flush_all(void) BlockDriverState *bs; int result = 0; - QTAILQ_FOREACH(bs, &bdrv_states, list) { + QTAILQ_FOREACH(bs, &bdrv_states, device_list) { int ret = bdrv_flush(bs); if (ret < 0 && !result) { result = ret; @@ -4278,7 +4753,7 @@ void bdrv_invalidate_cache_all(void) { BlockDriverState *bs; - QTAILQ_FOREACH(bs, &bdrv_states, list) { + QTAILQ_FOREACH(bs, &bdrv_states, device_list) { bdrv_invalidate_cache(bs); } } @@ -4287,7 +4762,7 @@ void bdrv_clear_incoming_migration_all(void) { BlockDriverState *bs; - QTAILQ_FOREACH(bs, &bdrv_states, list) { + QTAILQ_FOREACH(bs, &bdrv_states, device_list) { bs->open_flags = bs->open_flags & ~(BDRV_O_INCOMING); } } @@ -4314,9 +4789,15 @@ int bdrv_flush(BlockDriverState *bs) return rwco.ret; } +typedef struct DiscardCo { + BlockDriverState *bs; + int64_t sector_num; + int nb_sectors; + int ret; +} DiscardCo; static void coroutine_fn bdrv_discard_co_entry(void *opaque) { - RwCo *rwco = opaque; + DiscardCo *rwco = opaque; rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors); } @@ -4400,7 +4881,7 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors) { Coroutine *co; - RwCo rwco = { + DiscardCo rwco = { .bs = bs, .sector_num = sector_num, .nb_sectors = nb_sectors, @@ -4505,14 +4986,14 @@ BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs, return NULL; } -void bdrv_set_buffer_alignment(BlockDriverState *bs, int align) +void bdrv_set_guest_block_size(BlockDriverState *bs, int align) { - bs->buffer_alignment = align; + bs->guest_block_size = align; } void *qemu_blockalign(BlockDriverState *bs, size_t size) { - return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size); + return qemu_memalign(bdrv_opt_mem_align(bs), size); } /* @@ -4521,9 +5002,13 @@ void *qemu_blockalign(BlockDriverState *bs, size_t size) bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov) { int i; + size_t alignment = bdrv_opt_mem_align(bs); for (i = 0; i < qiov->niov; i++) { - if ((uintptr_t) qiov->iov[i].iov_base % bs->buffer_alignment) { + if ((uintptr_t) qiov->iov[i].iov_base % alignment) { + return false; + } + if (qiov->iov[i].iov_len % alignment) { return false; } } @@ -4875,21 +5360,68 @@ int bdrv_amend_options(BlockDriverState *bs, QEMUOptionParameter *options) return bs->drv->bdrv_amend_options(bs, options); } -ExtSnapshotPerm bdrv_check_ext_snapshot(BlockDriverState *bs) +/* Used to recurse on single child block filters. + * Single child block filter will store their child in bs->file. + */ +bool bdrv_generic_is_first_non_filter(BlockDriverState *bs, + BlockDriverState *candidate) { - if (bs->drv->bdrv_check_ext_snapshot) { - return bs->drv->bdrv_check_ext_snapshot(bs); + if (!bs->drv) { + return false; } - if (bs->file && bs->file->drv && bs->file->drv->bdrv_check_ext_snapshot) { - return bs->file->drv->bdrv_check_ext_snapshot(bs); + if (!bs->drv->authorizations[BS_IS_A_FILTER]) { + if (bs == candidate) { + return true; + } else { + return false; + } + } + + if (!bs->drv->authorizations[BS_FILTER_PASS_DOWN]) { + return false; + } + + if (!bs->file) { + return false; } - /* external snapshots are allowed by default */ - return EXT_SNAPSHOT_ALLOWED; + return bdrv_recurse_is_first_non_filter(bs->file, candidate); } -ExtSnapshotPerm bdrv_check_ext_snapshot_forbidden(BlockDriverState *bs) +bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs, + BlockDriverState *candidate) +{ + if (bs->drv && bs->drv->bdrv_recurse_is_first_non_filter) { + return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate); + } + + return bdrv_generic_is_first_non_filter(bs, candidate); +} + +/* This function checks if the candidate is the first non filter bs down it's + * bs chain. Since we don't have pointers to parents it explore all bs chains + * from the top. Some filters can choose not to pass down the recursion. + */ +bool bdrv_is_first_non_filter(BlockDriverState *candidate) { - return EXT_SNAPSHOT_FORBIDDEN; + BlockDriverState *bs; + + /* walk down the bs forest recursively */ + QTAILQ_FOREACH(bs, &bdrv_states, device_list) { + bool perm; + + if (!bs->file) { + continue; + } + + perm = bdrv_recurse_is_first_non_filter(bs->file, candidate); + + /* candidate is the first non filter */ + if (perm) { + return true; + } + } + + return false; } diff --git a/block/backup.c b/block/backup.c index 0198514..15a2e55 100644 --- a/block/backup.c +++ b/block/backup.c @@ -181,8 +181,13 @@ static int coroutine_fn backup_before_write_notify( void *opaque) { BdrvTrackedRequest *req = opaque; + int64_t sector_num = req->offset >> BDRV_SECTOR_BITS; + int nb_sectors = req->bytes >> BDRV_SECTOR_BITS; - return backup_do_cow(req->bs, req->sector_num, req->nb_sectors, NULL); + assert((req->offset & (BDRV_SECTOR_SIZE - 1)) == 0); + assert((req->bytes & (BDRV_SECTOR_SIZE - 1)) == 0); + + return backup_do_cow(req->bs, sector_num, nb_sectors, NULL); } static void backup_set_speed(BlockJob *job, int64_t speed, Error **errp) diff --git a/block/blkdebug.c b/block/blkdebug.c index ebc5f13..56c4cd0 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -186,6 +186,14 @@ static const char *event_names[BLKDBG_EVENT_MAX] = { [BLKDBG_FLUSH_TO_OS] = "flush_to_os", [BLKDBG_FLUSH_TO_DISK] = "flush_to_disk", + + [BLKDBG_PWRITEV_RMW_HEAD] = "pwritev_rmw.head", + [BLKDBG_PWRITEV_RMW_AFTER_HEAD] = "pwritev_rmw.after_head", + [BLKDBG_PWRITEV_RMW_TAIL] = "pwritev_rmw.tail", + [BLKDBG_PWRITEV_RMW_AFTER_TAIL] = "pwritev_rmw.after_tail", + [BLKDBG_PWRITEV] = "pwritev", + [BLKDBG_PWRITEV_ZERO] = "pwritev_zero", + [BLKDBG_PWRITEV_DONE] = "pwritev_done", }; static int get_event_by_name(const char *name, BlkDebugEvent *event) @@ -271,19 +279,33 @@ static void remove_rule(BlkdebugRule *rule) g_free(rule); } -static int read_config(BDRVBlkdebugState *s, const char *filename) +static int read_config(BDRVBlkdebugState *s, const char *filename, + QDict *options, Error **errp) { - FILE *f; + FILE *f = NULL; int ret; struct add_rule_data d; + Error *local_err = NULL; + + if (filename) { + f = fopen(filename, "r"); + if (f == NULL) { + error_setg_errno(errp, errno, "Could not read blkdebug config file"); + return -errno; + } - f = fopen(filename, "r"); - if (f == NULL) { - return -errno; + ret = qemu_config_parse(f, config_groups, filename); + if (ret < 0) { + error_setg(errp, "Could not parse blkdebug config file"); + ret = -EINVAL; + goto fail; + } } - ret = qemu_config_parse(f, config_groups, filename); - if (ret < 0) { + qemu_config_parse_qdict(options, config_groups, &local_err); + if (error_is_set(&local_err)) { + error_propagate(errp, local_err); + ret = -EINVAL; goto fail; } @@ -298,7 +320,9 @@ static int read_config(BDRVBlkdebugState *s, const char *filename) fail: qemu_opts_reset(&inject_error_opts); qemu_opts_reset(&set_state_opts); - fclose(f); + if (f) { + fclose(f); + } return ret; } @@ -310,7 +334,9 @@ static void blkdebug_parse_filename(const char *filename, QDict *options, /* Parse the blkdebug: prefix */ if (!strstart(filename, "blkdebug:", &filename)) { - error_setg(errp, "File name string must start with 'blkdebug:'"); + /* There was no prefix; therefore, all options have to be already + present in the QDict (except for the filename) */ + qdict_put(options, "x-image", qstring_from_str(filename)); return; } @@ -346,6 +372,11 @@ static QemuOptsList runtime_opts = { .type = QEMU_OPT_STRING, .help = "[internal use only, will be removed]", }, + { + .name = "align", + .type = QEMU_OPT_SIZE, + .help = "Required alignment in bytes", + }, { /* end of list */ } }, }; @@ -356,7 +387,8 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags, BDRVBlkdebugState *s = bs->opaque; QemuOpts *opts; Error *local_err = NULL; - const char *filename, *config; + const char *config; + uint64_t align; int ret; opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort); @@ -367,30 +399,31 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags, goto fail; } - /* Read rules from config file */ + /* Read rules from config file or command line options */ config = qemu_opt_get(opts, "config"); - if (config) { - ret = read_config(s, config); - if (ret < 0) { - error_setg_errno(errp, -ret, "Could not read blkdebug config file"); - goto fail; - } + ret = read_config(s, config, options, errp); + if (ret) { + goto fail; } /* Set initial state */ s->state = 1; /* Open the backing file */ - filename = qemu_opt_get(opts, "x-image"); - if (filename == NULL) { - error_setg(errp, "Could not retrieve image file name"); - ret = -EINVAL; + ret = bdrv_open_image(&bs->file, qemu_opt_get(opts, "x-image"), options, "image", + flags, true, false, &local_err); + if (ret < 0) { + error_propagate(errp, local_err); goto fail; } - ret = bdrv_file_open(&bs->file, filename, NULL, flags, &local_err); - if (ret < 0) { - error_propagate(errp, local_err); + /* Set request alignment */ + align = qemu_opt_get_size(opts, "align", bs->request_alignment); + if (align > 0 && align < INT_MAX && !(align & (align - 1))) { + bs->request_alignment = align; + } else { + error_setg(errp, "Invalid alignment"); + ret = -EINVAL; goto fail; } diff --git a/block/blkverify.c b/block/blkverify.c index 1c1637f..cfcbcf4 100644 --- a/block/blkverify.c +++ b/block/blkverify.c @@ -78,7 +78,9 @@ static void blkverify_parse_filename(const char *filename, QDict *options, /* Parse the blkverify: prefix */ if (!strstart(filename, "blkverify:", &filename)) { - error_setg(errp, "File name string must start with 'blkverify:'"); + /* There was no prefix; therefore, all options have to be already + present in the QDict (except for the filename) */ + qdict_put(options, "x-image", qstring_from_str(filename)); return; } @@ -122,7 +124,6 @@ static int blkverify_open(BlockDriverState *bs, QDict *options, int flags, BDRVBlkverifyState *s = bs->opaque; QemuOpts *opts; Error *local_err = NULL; - const char *filename, *raw; int ret; opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort); @@ -133,33 +134,19 @@ static int blkverify_open(BlockDriverState *bs, QDict *options, int flags, goto fail; } - /* Parse the raw image filename */ - raw = qemu_opt_get(opts, "x-raw"); - if (raw == NULL) { - error_setg(errp, "Could not retrieve raw image filename"); - ret = -EINVAL; - goto fail; - } - - ret = bdrv_file_open(&bs->file, raw, NULL, flags, &local_err); + /* Open the raw file */ + ret = bdrv_open_image(&bs->file, qemu_opt_get(opts, "x-raw"), options, + "raw", flags, true, false, &local_err); if (ret < 0) { error_propagate(errp, local_err); goto fail; } /* Open the test file */ - filename = qemu_opt_get(opts, "x-image"); - if (filename == NULL) { - error_setg(errp, "Could not retrieve test image filename"); - ret = -EINVAL; - goto fail; - } - - s->test_file = bdrv_new(""); - ret = bdrv_open(s->test_file, filename, NULL, flags, NULL, &local_err); + ret = bdrv_open_image(&s->test_file, qemu_opt_get(opts, "x-image"), options, + "test", flags, false, false, &local_err); if (ret < 0) { error_propagate(errp, local_err); - bdrv_unref(s->test_file); s->test_file = NULL; goto fail; } @@ -417,7 +404,7 @@ static BlockDriver bdrv_blkverify = { .bdrv_aio_writev = blkverify_aio_writev, .bdrv_aio_flush = blkverify_aio_flush, - .bdrv_check_ext_snapshot = bdrv_check_ext_snapshot_forbidden, + .authorizations = { true, false }, }; static void bdrv_blkverify_init(void) diff --git a/block/cow.c b/block/cow.c index dc15e46..7fc0b12 100644 --- a/block/cow.c +++ b/block/cow.c @@ -351,7 +351,8 @@ static int cow_create(const char *filename, QEMUOptionParameter *options, return ret; } - ret = bdrv_file_open(&cow_bs, filename, NULL, BDRV_O_RDWR, &local_err); + ret = bdrv_file_open(&cow_bs, filename, NULL, NULL, BDRV_O_RDWR, + &local_err); if (ret < 0) { qerror_report_err(local_err); error_free(local_err); diff --git a/block/curl.c b/block/curl.c index a603936..a807584 100644 --- a/block/curl.c +++ b/block/curl.c @@ -34,6 +34,11 @@ #define DPRINTF(fmt, ...) do { } while (0) #endif +#if LIBCURL_VERSION_NUM >= 0x071000 +/* The multi interface timer callback was introduced in 7.16.0 */ +#define NEED_CURL_TIMER_CALLBACK +#endif + #define PROTOCOLS (CURLPROTO_HTTP | CURLPROTO_HTTPS | \ CURLPROTO_FTP | CURLPROTO_FTPS | \ CURLPROTO_TFTP) @@ -77,6 +82,7 @@ typedef struct CURLState typedef struct BDRVCURLState { CURLM *multi; + QEMUTimer timer; size_t len; CURLState states[CURL_NUM_STATES]; char *url; @@ -87,6 +93,23 @@ typedef struct BDRVCURLState { static void curl_clean_state(CURLState *s); static void curl_multi_do(void *arg); +#ifdef NEED_CURL_TIMER_CALLBACK +static int curl_timer_cb(CURLM *multi, long timeout_ms, void *opaque) +{ + BDRVCURLState *s = opaque; + + DPRINTF("CURL: timer callback timeout_ms %ld\n", timeout_ms); + if (timeout_ms == -1) { + timer_del(&s->timer); + } else { + int64_t timeout_ns = (int64_t)timeout_ms * 1000 * 1000; + timer_mod(&s->timer, + qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + timeout_ns); + } + return 0; +} +#endif + static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action, void *s, void *sp) { @@ -209,20 +232,10 @@ static int curl_find_buf(BDRVCURLState *s, size_t start, size_t len, return FIND_RET_NONE; } -static void curl_multi_do(void *arg) +static void curl_multi_read(BDRVCURLState *s) { - BDRVCURLState *s = (BDRVCURLState *)arg; - int running; - int r; int msgs_in_queue; - if (!s->multi) - return; - - do { - r = curl_multi_socket_all(s->multi, &running); - } while(r == CURLM_CALL_MULTI_PERFORM); - /* Try to find done transfers, so we can free the easy * handle again. */ do { @@ -266,6 +279,41 @@ static void curl_multi_do(void *arg) } while(msgs_in_queue); } +static void curl_multi_do(void *arg) +{ + BDRVCURLState *s = (BDRVCURLState *)arg; + int running; + int r; + + if (!s->multi) { + return; + } + + do { + r = curl_multi_socket_all(s->multi, &running); + } while(r == CURLM_CALL_MULTI_PERFORM); + + curl_multi_read(s); +} + +static void curl_multi_timeout_do(void *arg) +{ +#ifdef NEED_CURL_TIMER_CALLBACK + BDRVCURLState *s = (BDRVCURLState *)arg; + int running; + + if (!s->multi) { + return; + } + + curl_multi_socket_action(s->multi, CURL_SOCKET_TIMEOUT, 0, &running); + + curl_multi_read(s); +#else + abort(); +#endif +} + static CURLState *curl_init_state(BDRVCURLState *s) { CURLState *state = NULL; @@ -473,12 +521,20 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags, curl_easy_cleanup(state->curl); state->curl = NULL; + aio_timer_init(bdrv_get_aio_context(bs), &s->timer, + QEMU_CLOCK_REALTIME, SCALE_NS, + curl_multi_timeout_do, s); + // Now we know the file exists and its size, so let's // initialize the multi interface! s->multi = curl_multi_init(); curl_multi_setopt(s->multi, CURLMOPT_SOCKETDATA, s); curl_multi_setopt(s->multi, CURLMOPT_SOCKETFUNCTION, curl_sock_cb); +#ifdef NEED_CURL_TIMER_CALLBACK + curl_multi_setopt(s->multi, CURLMOPT_TIMERDATA, s); + curl_multi_setopt(s->multi, CURLMOPT_TIMERFUNCTION, curl_timer_cb); +#endif curl_multi_do(s); qemu_opts_del(opts); @@ -597,6 +653,9 @@ static void curl_close(BlockDriverState *bs) } if (s->multi) curl_multi_cleanup(s->multi); + + timer_del(&s->timer); + g_free(s->url); } diff --git a/block/gluster.c b/block/gluster.c index 563d497..a009b15 100644 --- a/block/gluster.c +++ b/block/gluster.c @@ -21,19 +21,15 @@ #include "qemu/uri.h" typedef struct GlusterAIOCB { - BlockDriverAIOCB common; int64_t size; int ret; - bool *finished; QEMUBH *bh; + Coroutine *coroutine; } GlusterAIOCB; typedef struct BDRVGlusterState { struct glfs *glfs; - int fds[2]; struct glfs_fd *fd; - int event_reader_pos; - GlusterAIOCB *event_acb; } BDRVGlusterState; #define GLUSTER_FD_READ 0 @@ -231,46 +227,32 @@ out: return NULL; } -static void qemu_gluster_complete_aio(GlusterAIOCB *acb, BDRVGlusterState *s) +static void qemu_gluster_complete_aio(void *opaque) { - int ret; - bool *finished = acb->finished; - BlockDriverCompletionFunc *cb = acb->common.cb; - void *opaque = acb->common.opaque; - - if (!acb->ret || acb->ret == acb->size) { - ret = 0; /* Success */ - } else if (acb->ret < 0) { - ret = acb->ret; /* Read/Write failed */ - } else { - ret = -EIO; /* Partial read/write - fail it */ - } + GlusterAIOCB *acb = (GlusterAIOCB *)opaque; - qemu_aio_release(acb); - cb(opaque, ret); - if (finished) { - *finished = true; - } + qemu_bh_delete(acb->bh); + acb->bh = NULL; + qemu_coroutine_enter(acb->coroutine, NULL); } -static void qemu_gluster_aio_event_reader(void *opaque) +/* + * AIO callback routine called from GlusterFS thread. + */ +static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg) { - BDRVGlusterState *s = opaque; - ssize_t ret; - - do { - char *p = (char *)&s->event_acb; - - ret = read(s->fds[GLUSTER_FD_READ], p + s->event_reader_pos, - sizeof(s->event_acb) - s->event_reader_pos); - if (ret > 0) { - s->event_reader_pos += ret; - if (s->event_reader_pos == sizeof(s->event_acb)) { - s->event_reader_pos = 0; - qemu_gluster_complete_aio(s->event_acb, s); - } - } - } while (ret < 0 && errno == EINTR); + GlusterAIOCB *acb = (GlusterAIOCB *)arg; + + if (!ret || ret == acb->size) { + acb->ret = 0; /* Success */ + } else if (ret < 0) { + acb->ret = ret; /* Read/Write failed */ + } else { + acb->ret = -EIO; /* Partial read/write - fail it */ + } + + acb->bh = qemu_bh_new(qemu_gluster_complete_aio, acb); + qemu_bh_schedule(acb->bh); } /* TODO Convert to fine grained options */ @@ -309,7 +291,6 @@ static int qemu_gluster_open(BlockDriverState *bs, QDict *options, filename = qemu_opt_get(opts, "filename"); - s->glfs = qemu_gluster_init(gconf, filename); if (!s->glfs) { ret = -errno; @@ -329,18 +310,8 @@ static int qemu_gluster_open(BlockDriverState *bs, QDict *options, s->fd = glfs_open(s->glfs, gconf->image, open_flags); if (!s->fd) { ret = -errno; - goto out; } - ret = qemu_pipe(s->fds); - if (ret < 0) { - ret = -errno; - goto out; - } - fcntl(s->fds[GLUSTER_FD_READ], F_SETFL, O_NONBLOCK); - qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], - qemu_gluster_aio_event_reader, NULL, s); - out: qemu_opts_del(opts); qemu_gluster_gconf_free(gconf); @@ -356,12 +327,65 @@ out: return ret; } +#ifdef CONFIG_GLUSTERFS_ZEROFILL +static coroutine_fn int qemu_gluster_co_write_zeroes(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, BdrvRequestFlags flags) +{ + int ret; + GlusterAIOCB *acb = g_slice_new(GlusterAIOCB); + BDRVGlusterState *s = bs->opaque; + off_t size = nb_sectors * BDRV_SECTOR_SIZE; + off_t offset = sector_num * BDRV_SECTOR_SIZE; + + acb->size = size; + acb->ret = 0; + acb->coroutine = qemu_coroutine_self(); + + ret = glfs_zerofill_async(s->fd, offset, size, &gluster_finish_aiocb, acb); + if (ret < 0) { + ret = -errno; + goto out; + } + + qemu_coroutine_yield(); + ret = acb->ret; + +out: + g_slice_free(GlusterAIOCB, acb); + return ret; +} + +static inline bool gluster_supports_zerofill(void) +{ + return 1; +} + +static inline int qemu_gluster_zerofill(struct glfs_fd *fd, int64_t offset, + int64_t size) +{ + return glfs_zerofill(fd, offset, size); +} + +#else +static inline bool gluster_supports_zerofill(void) +{ + return 0; +} + +static inline int qemu_gluster_zerofill(struct glfs_fd *fd, int64_t offset, + int64_t size) +{ + return 0; +} +#endif + static int qemu_gluster_create(const char *filename, QEMUOptionParameter *options, Error **errp) { struct glfs *glfs; struct glfs_fd *fd; int ret = 0; + int prealloc = 0; int64_t total_size = 0; GlusterConf *gconf = g_malloc0(sizeof(GlusterConf)); @@ -374,6 +398,19 @@ static int qemu_gluster_create(const char *filename, while (options && options->name) { if (!strcmp(options->name, BLOCK_OPT_SIZE)) { total_size = options->value.n / BDRV_SECTOR_SIZE; + } else if (!strcmp(options->name, BLOCK_OPT_PREALLOC)) { + if (!options->value.s || !strcmp(options->value.s, "off")) { + prealloc = 0; + } else if (!strcmp(options->value.s, "full") && + gluster_supports_zerofill()) { + prealloc = 1; + } else { + error_setg(errp, "Invalid preallocation mode: '%s'" + " or GlusterFS doesn't support zerofill API", + options->value.s); + ret = -EINVAL; + goto out; + } } options++; } @@ -383,9 +420,15 @@ static int qemu_gluster_create(const char *filename, if (!fd) { ret = -errno; } else { - if (glfs_ftruncate(fd, total_size * BDRV_SECTOR_SIZE) != 0) { + if (!glfs_ftruncate(fd, total_size * BDRV_SECTOR_SIZE)) { + if (prealloc && qemu_gluster_zerofill(fd, 0, + total_size * BDRV_SECTOR_SIZE)) { + ret = -errno; + } + } else { ret = -errno; } + if (glfs_close(fd) != 0) { ret = -errno; } @@ -398,58 +441,18 @@ out: return ret; } -static void qemu_gluster_aio_cancel(BlockDriverAIOCB *blockacb) -{ - GlusterAIOCB *acb = (GlusterAIOCB *)blockacb; - bool finished = false; - - acb->finished = &finished; - while (!finished) { - qemu_aio_wait(); - } -} - -static const AIOCBInfo gluster_aiocb_info = { - .aiocb_size = sizeof(GlusterAIOCB), - .cancel = qemu_gluster_aio_cancel, -}; - -static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg) -{ - GlusterAIOCB *acb = (GlusterAIOCB *)arg; - BlockDriverState *bs = acb->common.bs; - BDRVGlusterState *s = bs->opaque; - int retval; - - acb->ret = ret; - retval = qemu_write_full(s->fds[GLUSTER_FD_WRITE], &acb, sizeof(acb)); - if (retval != sizeof(acb)) { - /* - * Gluster AIO callback thread failed to notify the waiting - * QEMU thread about IO completion. - */ - error_report("Gluster AIO completion failed: %s", strerror(errno)); - abort(); - } -} - -static BlockDriverAIOCB *qemu_gluster_aio_rw(BlockDriverState *bs, - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, - BlockDriverCompletionFunc *cb, void *opaque, int write) +static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int write) { int ret; - GlusterAIOCB *acb; + GlusterAIOCB *acb = g_slice_new(GlusterAIOCB); BDRVGlusterState *s = bs->opaque; - size_t size; - off_t offset; - - offset = sector_num * BDRV_SECTOR_SIZE; - size = nb_sectors * BDRV_SECTOR_SIZE; + size_t size = nb_sectors * BDRV_SECTOR_SIZE; + off_t offset = sector_num * BDRV_SECTOR_SIZE; - acb = qemu_aio_get(&gluster_aiocb_info, bs, cb, opaque); acb->size = size; acb->ret = 0; - acb->finished = NULL; + acb->coroutine = qemu_coroutine_self(); if (write) { ret = glfs_pwritev_async(s->fd, qiov->iov, qiov->niov, offset, 0, @@ -460,13 +463,16 @@ static BlockDriverAIOCB *qemu_gluster_aio_rw(BlockDriverState *bs, } if (ret < 0) { + ret = -errno; goto out; } - return &acb->common; + + qemu_coroutine_yield(); + ret = acb->ret; out: - qemu_aio_release(acb); - return NULL; + g_slice_free(GlusterAIOCB, acb); + return ret; } static int qemu_gluster_truncate(BlockDriverState *bs, int64_t offset) @@ -482,71 +488,68 @@ static int qemu_gluster_truncate(BlockDriverState *bs, int64_t offset) return 0; } -static BlockDriverAIOCB *qemu_gluster_aio_readv(BlockDriverState *bs, - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, - BlockDriverCompletionFunc *cb, void *opaque) +static coroutine_fn int qemu_gluster_co_readv(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) { - return qemu_gluster_aio_rw(bs, sector_num, qiov, nb_sectors, cb, opaque, 0); + return qemu_gluster_co_rw(bs, sector_num, nb_sectors, qiov, 0); } -static BlockDriverAIOCB *qemu_gluster_aio_writev(BlockDriverState *bs, - int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, - BlockDriverCompletionFunc *cb, void *opaque) +static coroutine_fn int qemu_gluster_co_writev(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) { - return qemu_gluster_aio_rw(bs, sector_num, qiov, nb_sectors, cb, opaque, 1); + return qemu_gluster_co_rw(bs, sector_num, nb_sectors, qiov, 1); } -static BlockDriverAIOCB *qemu_gluster_aio_flush(BlockDriverState *bs, - BlockDriverCompletionFunc *cb, void *opaque) +static coroutine_fn int qemu_gluster_co_flush_to_disk(BlockDriverState *bs) { int ret; - GlusterAIOCB *acb; + GlusterAIOCB *acb = g_slice_new(GlusterAIOCB); BDRVGlusterState *s = bs->opaque; - acb = qemu_aio_get(&gluster_aiocb_info, bs, cb, opaque); acb->size = 0; acb->ret = 0; - acb->finished = NULL; + acb->coroutine = qemu_coroutine_self(); ret = glfs_fsync_async(s->fd, &gluster_finish_aiocb, acb); if (ret < 0) { + ret = -errno; goto out; } - return &acb->common; + + qemu_coroutine_yield(); + ret = acb->ret; out: - qemu_aio_release(acb); - return NULL; + g_slice_free(GlusterAIOCB, acb); + return ret; } #ifdef CONFIG_GLUSTERFS_DISCARD -static BlockDriverAIOCB *qemu_gluster_aio_discard(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, BlockDriverCompletionFunc *cb, - void *opaque) +static coroutine_fn int qemu_gluster_co_discard(BlockDriverState *bs, + int64_t sector_num, int nb_sectors) { int ret; - GlusterAIOCB *acb; + GlusterAIOCB *acb = g_slice_new(GlusterAIOCB); BDRVGlusterState *s = bs->opaque; - size_t size; - off_t offset; - - offset = sector_num * BDRV_SECTOR_SIZE; - size = nb_sectors * BDRV_SECTOR_SIZE; + size_t size = nb_sectors * BDRV_SECTOR_SIZE; + off_t offset = sector_num * BDRV_SECTOR_SIZE; - acb = qemu_aio_get(&gluster_aiocb_info, bs, cb, opaque); acb->size = 0; acb->ret = 0; - acb->finished = NULL; + acb->coroutine = qemu_coroutine_self(); ret = glfs_discard_async(s->fd, offset, size, &gluster_finish_aiocb, acb); if (ret < 0) { + ret = -errno; goto out; } - return &acb->common; + + qemu_coroutine_yield(); + ret = acb->ret; out: - qemu_aio_release(acb); - return NULL; + g_slice_free(GlusterAIOCB, acb); + return ret; } #endif @@ -581,10 +584,6 @@ static void qemu_gluster_close(BlockDriverState *bs) { BDRVGlusterState *s = bs->opaque; - close(s->fds[GLUSTER_FD_READ]); - close(s->fds[GLUSTER_FD_WRITE]); - qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], NULL, NULL, NULL); - if (s->fd) { glfs_close(s->fd); s->fd = NULL; @@ -604,6 +603,11 @@ static QEMUOptionParameter qemu_gluster_create_options[] = { .type = OPT_SIZE, .help = "Virtual disk size" }, + { + .name = BLOCK_OPT_PREALLOC, + .type = OPT_STRING, + .help = "Preallocation mode (allowed values: off, full)" + }, { NULL } }; @@ -618,12 +622,15 @@ static BlockDriver bdrv_gluster = { .bdrv_getlength = qemu_gluster_getlength, .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, .bdrv_truncate = qemu_gluster_truncate, - .bdrv_aio_readv = qemu_gluster_aio_readv, - .bdrv_aio_writev = qemu_gluster_aio_writev, - .bdrv_aio_flush = qemu_gluster_aio_flush, + .bdrv_co_readv = qemu_gluster_co_readv, + .bdrv_co_writev = qemu_gluster_co_writev, + .bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk, .bdrv_has_zero_init = qemu_gluster_has_zero_init, #ifdef CONFIG_GLUSTERFS_DISCARD - .bdrv_aio_discard = qemu_gluster_aio_discard, + .bdrv_co_discard = qemu_gluster_co_discard, +#endif +#ifdef CONFIG_GLUSTERFS_ZEROFILL + .bdrv_co_write_zeroes = qemu_gluster_co_write_zeroes, #endif .create_options = qemu_gluster_create_options, }; @@ -639,12 +646,15 @@ static BlockDriver bdrv_gluster_tcp = { .bdrv_getlength = qemu_gluster_getlength, .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, .bdrv_truncate = qemu_gluster_truncate, - .bdrv_aio_readv = qemu_gluster_aio_readv, - .bdrv_aio_writev = qemu_gluster_aio_writev, - .bdrv_aio_flush = qemu_gluster_aio_flush, + .bdrv_co_readv = qemu_gluster_co_readv, + .bdrv_co_writev = qemu_gluster_co_writev, + .bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk, .bdrv_has_zero_init = qemu_gluster_has_zero_init, #ifdef CONFIG_GLUSTERFS_DISCARD - .bdrv_aio_discard = qemu_gluster_aio_discard, + .bdrv_co_discard = qemu_gluster_co_discard, +#endif +#ifdef CONFIG_GLUSTERFS_ZEROFILL + .bdrv_co_write_zeroes = qemu_gluster_co_write_zeroes, #endif .create_options = qemu_gluster_create_options, }; @@ -660,12 +670,15 @@ static BlockDriver bdrv_gluster_unix = { .bdrv_getlength = qemu_gluster_getlength, .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, .bdrv_truncate = qemu_gluster_truncate, - .bdrv_aio_readv = qemu_gluster_aio_readv, - .bdrv_aio_writev = qemu_gluster_aio_writev, - .bdrv_aio_flush = qemu_gluster_aio_flush, + .bdrv_co_readv = qemu_gluster_co_readv, + .bdrv_co_writev = qemu_gluster_co_writev, + .bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk, .bdrv_has_zero_init = qemu_gluster_has_zero_init, #ifdef CONFIG_GLUSTERFS_DISCARD - .bdrv_aio_discard = qemu_gluster_aio_discard, + .bdrv_co_discard = qemu_gluster_co_discard, +#endif +#ifdef CONFIG_GLUSTERFS_ZEROFILL + .bdrv_co_write_zeroes = qemu_gluster_co_write_zeroes, #endif .create_options = qemu_gluster_create_options, }; @@ -681,12 +694,15 @@ static BlockDriver bdrv_gluster_rdma = { .bdrv_getlength = qemu_gluster_getlength, .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, .bdrv_truncate = qemu_gluster_truncate, - .bdrv_aio_readv = qemu_gluster_aio_readv, - .bdrv_aio_writev = qemu_gluster_aio_writev, - .bdrv_aio_flush = qemu_gluster_aio_flush, + .bdrv_co_readv = qemu_gluster_co_readv, + .bdrv_co_writev = qemu_gluster_co_writev, + .bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk, .bdrv_has_zero_init = qemu_gluster_has_zero_init, #ifdef CONFIG_GLUSTERFS_DISCARD - .bdrv_aio_discard = qemu_gluster_aio_discard, + .bdrv_co_discard = qemu_gluster_co_discard, +#endif +#ifdef CONFIG_GLUSTERFS_ZEROFILL + .bdrv_co_write_zeroes = qemu_gluster_co_write_zeroes, #endif .create_options = qemu_gluster_create_options, }; diff --git a/block/iscsi.c b/block/iscsi.c index c0ea0c4..890bd81 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -308,7 +308,7 @@ retry: iscsi_co_generic_cb, &iTask); if (iTask.task == NULL) { g_free(buf); - return -EIO; + return -ENOMEM; } #if defined(LIBISCSI_FEATURE_IOVECTOR) scsi_task_set_iov_out(iTask.task, (struct scsi_iovec *) iov->iov, @@ -376,7 +376,7 @@ retry: break; } if (iTask.task == NULL) { - return -EIO; + return -ENOMEM; } #if defined(LIBISCSI_FEATURE_IOVECTOR) scsi_task_set_iov_in(iTask.task, (struct scsi_iovec *) iov->iov, iov->niov); @@ -419,7 +419,7 @@ static int coroutine_fn iscsi_co_flush(BlockDriverState *bs) retry: if (iscsi_synchronizecache10_task(iscsilun->iscsi, iscsilun->lun, 0, 0, 0, 0, iscsi_co_generic_cb, &iTask) == NULL) { - return -EIO; + return -ENOMEM; } while (!iTask.complete) { @@ -669,7 +669,7 @@ retry: sector_qemu2lun(sector_num, iscsilun), 8 + 16, iscsi_co_generic_cb, &iTask) == NULL) { - ret = -EIO; + ret = -ENOMEM; goto out; } @@ -753,7 +753,7 @@ coroutine_fn iscsi_co_discard(BlockDriverState *bs, int64_t sector_num, retry: if (iscsi_unmap_task(iscsilun->iscsi, iscsilun->lun, 0, 0, &list, 1, iscsi_co_generic_cb, &iTask) == NULL) { - return -EIO; + return -ENOMEM; } while (!iTask.complete) { @@ -822,7 +822,7 @@ retry: iscsilun->zeroblock, iscsilun->block_size, nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP), 0, 0, iscsi_co_generic_cb, &iTask) == NULL) { - return -EIO; + return -ENOMEM; } while (!iTask.complete) { @@ -1217,6 +1217,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags, goto out; } bs->total_sectors = sector_lun2qemu(iscsilun->num_blocks, iscsilun); + bs->request_alignment = iscsilun->block_size; /* Medium changer or tape. We dont have any emulation for this so this must * be sg ioctl compatible. We force it to be sg, otherwise qemu will try @@ -1265,23 +1266,6 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags, sizeof(struct scsi_inquiry_block_limits)); scsi_free_scsi_task(task); task = NULL; - - if (iscsilun->bl.max_unmap < 0xffffffff) { - bs->bl.max_discard = sector_lun2qemu(iscsilun->bl.max_unmap, - iscsilun); - } - bs->bl.discard_alignment = sector_lun2qemu(iscsilun->bl.opt_unmap_gran, - iscsilun); - - if (iscsilun->bl.max_ws_len < 0xffffffff) { - bs->bl.max_write_zeroes = sector_lun2qemu(iscsilun->bl.max_ws_len, - iscsilun); - } - bs->bl.write_zeroes_alignment = sector_lun2qemu(iscsilun->bl.opt_unmap_gran, - iscsilun); - - bs->bl.opt_transfer_length = sector_lun2qemu(iscsilun->bl.opt_xfer_len, - iscsilun); } #if defined(LIBISCSI_FEATURE_NOP_COUNTER) @@ -1326,6 +1310,34 @@ static void iscsi_close(BlockDriverState *bs) memset(iscsilun, 0, sizeof(IscsiLun)); } +static int iscsi_refresh_limits(BlockDriverState *bs) +{ + IscsiLun *iscsilun = bs->opaque; + + /* We don't actually refresh here, but just return data queried in + * iscsi_open(): iscsi targets don't change their limits. */ + if (iscsilun->lbp.lbpu || iscsilun->lbp.lbpws) { + if (iscsilun->bl.max_unmap < 0xffffffff) { + bs->bl.max_discard = sector_lun2qemu(iscsilun->bl.max_unmap, + iscsilun); + } + bs->bl.discard_alignment = sector_lun2qemu(iscsilun->bl.opt_unmap_gran, + iscsilun); + + if (iscsilun->bl.max_ws_len < 0xffffffff) { + bs->bl.max_write_zeroes = sector_lun2qemu(iscsilun->bl.max_ws_len, + iscsilun); + } + bs->bl.write_zeroes_alignment = sector_lun2qemu(iscsilun->bl.opt_unmap_gran, + iscsilun); + + bs->bl.opt_transfer_length = sector_lun2qemu(iscsilun->bl.opt_xfer_len, + iscsilun); + } + + return 0; +} + static int iscsi_truncate(BlockDriverState *bs, int64_t offset) { IscsiLun *iscsilun = bs->opaque; @@ -1438,6 +1450,7 @@ static BlockDriver bdrv_iscsi = { .bdrv_getlength = iscsi_getlength, .bdrv_get_info = iscsi_get_info, .bdrv_truncate = iscsi_truncate, + .bdrv_refresh_limits = iscsi_refresh_limits, #if defined(LIBISCSI_FEATURE_IOVECTOR) .bdrv_co_get_block_status = iscsi_co_get_block_status, diff --git a/block/mirror.c b/block/mirror.c index 2932bab..2a43334 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -96,6 +96,7 @@ static void mirror_iteration_done(MirrorOp *op, int ret) bitmap_set(s->cow_bitmap, chunk_num, nb_chunks); } + qemu_iovec_destroy(&op->qiov); g_slice_free(MirrorOp, op); qemu_coroutine_enter(s->common.co, NULL); } @@ -630,11 +631,49 @@ void commit_active_start(BlockDriverState *bs, BlockDriverState *base, BlockDriverCompletionFunc *cb, void *opaque, Error **errp) { + int64_t length, base_length; + int orig_base_flags; + + orig_base_flags = bdrv_get_flags(base); + if (bdrv_reopen(base, bs->open_flags, errp)) { return; } + + length = bdrv_getlength(bs); + if (length < 0) { + error_setg(errp, "Unable to determine length of %s", bs->filename); + goto error_restore_flags; + } + + base_length = bdrv_getlength(base); + if (base_length < 0) { + error_setg(errp, "Unable to determine length of %s", base->filename); + goto error_restore_flags; + } + + if (length > base_length) { + if (bdrv_truncate(base, length) < 0) { + error_setg(errp, "Top image %s is larger than base image %s, and " + "resize of base image failed", + bs->filename, base->filename); + goto error_restore_flags; + } + } + bdrv_ref(base); mirror_start_job(bs, base, speed, 0, 0, on_error, on_error, cb, opaque, errp, &commit_active_job_driver, false, base); + if (error_is_set(errp)) { + goto error_restore_flags; + } + + return; + +error_restore_flags: + /* ignore error and errp for bdrv_reopen, because we want to propagate + * the original error */ + bdrv_reopen(base, orig_base_flags, NULL); + return; } diff --git a/block/qapi.c b/block/qapi.c index a32cb79..8f4134b 100644 --- a/block/qapi.c +++ b/block/qapi.c @@ -29,6 +29,60 @@ #include "qapi/qmp-output-visitor.h" #include "qapi/qmp/types.h" +BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs) +{ + BlockDeviceInfo *info = g_malloc0(sizeof(*info)); + + info->file = g_strdup(bs->filename); + info->ro = bs->read_only; + info->drv = g_strdup(bs->drv->format_name); + info->encrypted = bs->encrypted; + info->encryption_key_missing = bdrv_key_required(bs); + + if (bs->node_name[0]) { + info->has_node_name = true; + info->node_name = g_strdup(bs->node_name); + } + + if (bs->backing_file[0]) { + info->has_backing_file = true; + info->backing_file = g_strdup(bs->backing_file); + } + + info->backing_file_depth = bdrv_get_backing_file_depth(bs); + + if (bs->io_limits_enabled) { + ThrottleConfig cfg; + throttle_get_config(&bs->throttle_state, &cfg); + info->bps = cfg.buckets[THROTTLE_BPS_TOTAL].avg; + info->bps_rd = cfg.buckets[THROTTLE_BPS_READ].avg; + info->bps_wr = cfg.buckets[THROTTLE_BPS_WRITE].avg; + + info->iops = cfg.buckets[THROTTLE_OPS_TOTAL].avg; + info->iops_rd = cfg.buckets[THROTTLE_OPS_READ].avg; + info->iops_wr = cfg.buckets[THROTTLE_OPS_WRITE].avg; + + info->has_bps_max = cfg.buckets[THROTTLE_BPS_TOTAL].max; + info->bps_max = cfg.buckets[THROTTLE_BPS_TOTAL].max; + info->has_bps_rd_max = cfg.buckets[THROTTLE_BPS_READ].max; + info->bps_rd_max = cfg.buckets[THROTTLE_BPS_READ].max; + info->has_bps_wr_max = cfg.buckets[THROTTLE_BPS_WRITE].max; + info->bps_wr_max = cfg.buckets[THROTTLE_BPS_WRITE].max; + + info->has_iops_max = cfg.buckets[THROTTLE_OPS_TOTAL].max; + info->iops_max = cfg.buckets[THROTTLE_OPS_TOTAL].max; + info->has_iops_rd_max = cfg.buckets[THROTTLE_OPS_READ].max; + info->iops_rd_max = cfg.buckets[THROTTLE_OPS_READ].max; + info->has_iops_wr_max = cfg.buckets[THROTTLE_OPS_WRITE].max; + info->iops_wr_max = cfg.buckets[THROTTLE_OPS_WRITE].max; + + info->has_iops_size = cfg.op_size; + info->iops_size = cfg.op_size; + } + + return info; +} + /* * Returns 0 on success, with *p_list either set to describe snapshot * information, or NULL because there are no snapshots. Returns -errno on @@ -211,60 +265,7 @@ void bdrv_query_info(BlockDriverState *bs, if (bs->drv) { info->has_inserted = true; - info->inserted = g_malloc0(sizeof(*info->inserted)); - info->inserted->file = g_strdup(bs->filename); - info->inserted->ro = bs->read_only; - info->inserted->drv = g_strdup(bs->drv->format_name); - info->inserted->encrypted = bs->encrypted; - info->inserted->encryption_key_missing = bdrv_key_required(bs); - - if (bs->backing_file[0]) { - info->inserted->has_backing_file = true; - info->inserted->backing_file = g_strdup(bs->backing_file); - } - - info->inserted->backing_file_depth = bdrv_get_backing_file_depth(bs); - - if (bs->io_limits_enabled) { - ThrottleConfig cfg; - throttle_get_config(&bs->throttle_state, &cfg); - info->inserted->bps = cfg.buckets[THROTTLE_BPS_TOTAL].avg; - info->inserted->bps_rd = cfg.buckets[THROTTLE_BPS_READ].avg; - info->inserted->bps_wr = cfg.buckets[THROTTLE_BPS_WRITE].avg; - - info->inserted->iops = cfg.buckets[THROTTLE_OPS_TOTAL].avg; - info->inserted->iops_rd = cfg.buckets[THROTTLE_OPS_READ].avg; - info->inserted->iops_wr = cfg.buckets[THROTTLE_OPS_WRITE].avg; - - info->inserted->has_bps_max = - cfg.buckets[THROTTLE_BPS_TOTAL].max; - info->inserted->bps_max = - cfg.buckets[THROTTLE_BPS_TOTAL].max; - info->inserted->has_bps_rd_max = - cfg.buckets[THROTTLE_BPS_READ].max; - info->inserted->bps_rd_max = - cfg.buckets[THROTTLE_BPS_READ].max; - info->inserted->has_bps_wr_max = - cfg.buckets[THROTTLE_BPS_WRITE].max; - info->inserted->bps_wr_max = - cfg.buckets[THROTTLE_BPS_WRITE].max; - - info->inserted->has_iops_max = - cfg.buckets[THROTTLE_OPS_TOTAL].max; - info->inserted->iops_max = - cfg.buckets[THROTTLE_OPS_TOTAL].max; - info->inserted->has_iops_rd_max = - cfg.buckets[THROTTLE_OPS_READ].max; - info->inserted->iops_rd_max = - cfg.buckets[THROTTLE_OPS_READ].max; - info->inserted->has_iops_wr_max = - cfg.buckets[THROTTLE_OPS_WRITE].max; - info->inserted->iops_wr_max = - cfg.buckets[THROTTLE_OPS_WRITE].max; - - info->inserted->has_iops_size = cfg.op_size; - info->inserted->iops_size = cfg.op_size; - } + info->inserted = bdrv_block_device_info(bs); bs0 = bs; p_image_info = &info->inserted->image; @@ -318,6 +319,11 @@ BlockStats *bdrv_query_stats(const BlockDriverState *bs) s->parent = bdrv_query_stats(bs->file); } + if (bs->backing_hd) { + s->has_backing = true; + s->backing = bdrv_query_stats(bs->backing_hd); + } + return s; } diff --git a/block/qcow.c b/block/qcow.c index c470e05..948b0c5 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -691,7 +691,8 @@ static int qcow_create(const char *filename, QEMUOptionParameter *options, return ret; } - ret = bdrv_file_open(&qcow_bs, filename, NULL, BDRV_O_RDWR, &local_err); + ret = bdrv_file_open(&qcow_bs, filename, NULL, NULL, BDRV_O_RDWR, + &local_err); if (ret < 0) { qerror_report_err(local_err); error_free(local_err); diff --git a/block/qcow2.c b/block/qcow2.c index 8ec9db1..2da62b8 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -718,7 +718,6 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, } qemu_opts_del(opts); - bs->bl.write_zeroes_alignment = s->cluster_sectors; if (s->use_lazy_refcounts && s->qcow_version < 3) { error_setg(errp, "Lazy refcounts require a qcow2 image with at least " @@ -751,6 +750,15 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, return ret; } +static int qcow2_refresh_limits(BlockDriverState *bs) +{ + BDRVQcowState *s = bs->opaque; + + bs->bl.write_zeroes_alignment = s->cluster_sectors; + + return 0; +} + static int qcow2_set_key(BlockDriverState *bs, const char *key) { BDRVQcowState *s = bs->opaque; @@ -1483,7 +1491,7 @@ static int qcow2_create2(const char *filename, int64_t total_size, return ret; } - ret = bdrv_file_open(&bs, filename, NULL, BDRV_O_RDWR, &local_err); + ret = bdrv_file_open(&bs, filename, NULL, NULL, BDRV_O_RDWR, &local_err); if (ret < 0) { error_propagate(errp, local_err); return ret; @@ -2268,6 +2276,7 @@ static BlockDriver bdrv_qcow2 = { .bdrv_change_backing_file = qcow2_change_backing_file, + .bdrv_refresh_limits = qcow2_refresh_limits, .bdrv_invalidate_cache = qcow2_invalidate_cache, .create_options = qcow2_create_options, diff --git a/block/qcow2.h b/block/qcow2.h index 303eb26..b5b7d13 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -340,11 +340,11 @@ typedef enum QCow2MetadataOverlap { #define QCOW2_OL_ALL \ (QCOW2_OL_CACHED | QCOW2_OL_INACTIVE_L2) -#define L1E_OFFSET_MASK 0x00ffffffffffff00ULL -#define L2E_OFFSET_MASK 0x00ffffffffffff00ULL +#define L1E_OFFSET_MASK 0x00fffffffffffe00ULL +#define L2E_OFFSET_MASK 0x00fffffffffffe00ULL #define L2E_COMPRESSED_OFFSET_SIZE_MASK 0x3fffffffffffffffULL -#define REFT_OFFSET_MASK 0xffffffffffffff00ULL +#define REFT_OFFSET_MASK 0xfffffffffffffe00ULL static inline int64_t start_of_cluster(BDRVQcowState *s, int64_t offset) { diff --git a/block/qed.c b/block/qed.c index 450a1fa..694e6e2 100644 --- a/block/qed.c +++ b/block/qed.c @@ -495,7 +495,6 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags, } } - bs->bl.write_zeroes_alignment = s->header.cluster_size >> BDRV_SECTOR_BITS; s->need_check_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, qed_need_check_timer_cb, s); @@ -507,6 +506,15 @@ out: return ret; } +static int bdrv_qed_refresh_limits(BlockDriverState *bs) +{ + BDRVQEDState *s = bs->opaque; + + bs->bl.write_zeroes_alignment = s->header.cluster_size >> BDRV_SECTOR_BITS; + + return 0; +} + /* We have nothing to do for QED reopen, stubs just return * success */ static int bdrv_qed_reopen_prepare(BDRVReopenState *state, @@ -563,8 +571,8 @@ static int qed_create(const char *filename, uint32_t cluster_size, return ret; } - ret = bdrv_file_open(&bs, filename, NULL, BDRV_O_RDWR | BDRV_O_CACHE_WB, - &local_err); + ret = bdrv_file_open(&bs, filename, NULL, NULL, + BDRV_O_RDWR | BDRV_O_CACHE_WB, &local_err); if (ret < 0) { qerror_report_err(local_err); error_free(local_err); @@ -1616,6 +1624,7 @@ static BlockDriver bdrv_qed = { .bdrv_truncate = bdrv_qed_truncate, .bdrv_getlength = bdrv_qed_getlength, .bdrv_get_info = bdrv_qed_get_info, + .bdrv_refresh_limits = bdrv_qed_refresh_limits, .bdrv_change_backing_file = bdrv_qed_change_backing_file, .bdrv_invalidate_cache = bdrv_qed_invalidate_cache, .bdrv_check = bdrv_qed_check, diff --git a/block/raw-posix.c b/block/raw-posix.c index 0676037..126a634 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -127,6 +127,8 @@ typedef struct BDRVRawState { int fd; int type; int open_flags; + size_t buf_align; + #if defined(__linux__) /* linux floppy specific */ int64_t fd_open_time; @@ -213,6 +215,76 @@ static int raw_normalize_devicepath(const char **filename) } #endif +static void raw_probe_alignment(BlockDriverState *bs) +{ + BDRVRawState *s = bs->opaque; + char *buf; + unsigned int sector_size; + + /* For /dev/sg devices the alignment is not really used. + With buffered I/O, we don't have any restrictions. */ + if (bs->sg || !(s->open_flags & O_DIRECT)) { + bs->request_alignment = 1; + s->buf_align = 1; + return; + } + + /* Try a few ioctls to get the right size */ + bs->request_alignment = 0; + s->buf_align = 0; + +#ifdef BLKSSZGET + if (ioctl(s->fd, BLKSSZGET, §or_size) >= 0) { + bs->request_alignment = sector_size; + } +#endif +#ifdef DKIOCGETBLOCKSIZE + if (ioctl(s->fd, DKIOCGETBLOCKSIZE, §or_size) >= 0) { + bs->request_alignment = sector_size; + } +#endif +#ifdef DIOCGSECTORSIZE + if (ioctl(s->fd, DIOCGSECTORSIZE, §or_size) >= 0) { + bs->request_alignment = sector_size; + } +#endif +#ifdef CONFIG_XFS + if (s->is_xfs) { + struct dioattr da; + if (xfsctl(NULL, s->fd, XFS_IOC_DIOINFO, &da) >= 0) { + bs->request_alignment = da.d_miniosz; + /* The kernel returns wrong information for d_mem */ + /* s->buf_align = da.d_mem; */ + } + } +#endif + + /* If we could not get the sizes so far, we can only guess them */ + if (!s->buf_align) { + size_t align; + buf = qemu_memalign(MAX_BLOCKSIZE, 2 * MAX_BLOCKSIZE); + for (align = 512; align <= MAX_BLOCKSIZE; align <<= 1) { + if (pread(s->fd, buf + align, MAX_BLOCKSIZE, 0) >= 0) { + s->buf_align = align; + break; + } + } + qemu_vfree(buf); + } + + if (!bs->request_alignment) { + size_t align; + buf = qemu_memalign(s->buf_align, MAX_BLOCKSIZE); + for (align = 512; align <= MAX_BLOCKSIZE; align <<= 1) { + if (pread(s->fd, buf, align, 0) >= 0) { + bs->request_alignment = align; + break; + } + } + qemu_vfree(buf); + } +} + static void raw_parse_flags(int bdrv_flags, int *open_flags) { assert(open_flags != NULL); @@ -463,7 +535,6 @@ static int raw_reopen_prepare(BDRVReopenState *state, return ret; } - static void raw_reopen_commit(BDRVReopenState *state) { BDRVRawReopenState *raw_s = state->opaque; @@ -499,23 +570,15 @@ static void raw_reopen_abort(BDRVReopenState *state) state->opaque = NULL; } +static int raw_refresh_limits(BlockDriverState *bs) +{ + BDRVRawState *s = bs->opaque; -/* XXX: use host sector size if necessary with: -#ifdef DIOCGSECTORSIZE - { - unsigned int sectorsize = 512; - if (!ioctl(fd, DIOCGSECTORSIZE, §orsize) && - sectorsize > bufsize) - bufsize = sectorsize; - } -#endif -#ifdef CONFIG_COCOA - uint32_t blockSize = 512; - if ( !ioctl( fd, DKIOCGETBLOCKSIZE, &blockSize ) && blockSize > bufsize) { - bufsize = blockSize; - } -#endif -*/ + raw_probe_alignment(bs); + bs->bl.opt_mem_alignment = s->buf_align; + + return 0; +} static ssize_t handle_aiocb_ioctl(RawPosixAIOData *aiocb) { @@ -1363,6 +1426,7 @@ static BlockDriver bdrv_file = { .bdrv_aio_writev = raw_aio_writev, .bdrv_aio_flush = raw_aio_flush, .bdrv_aio_discard = raw_aio_discard, + .bdrv_refresh_limits = raw_refresh_limits, .bdrv_truncate = raw_truncate, .bdrv_getlength = raw_getlength, @@ -1740,6 +1804,7 @@ static BlockDriver bdrv_host_device = { .bdrv_aio_writev = raw_aio_writev, .bdrv_aio_flush = raw_aio_flush, .bdrv_aio_discard = hdev_aio_discard, + .bdrv_refresh_limits = raw_refresh_limits, .bdrv_truncate = raw_truncate, .bdrv_getlength = raw_getlength, @@ -1871,6 +1936,7 @@ static BlockDriver bdrv_host_floppy = { .bdrv_aio_readv = raw_aio_readv, .bdrv_aio_writev = raw_aio_writev, .bdrv_aio_flush = raw_aio_flush, + .bdrv_refresh_limits = raw_refresh_limits, .bdrv_truncate = raw_truncate, .bdrv_getlength = raw_getlength, @@ -1981,6 +2047,7 @@ static BlockDriver bdrv_host_cdrom = { .bdrv_aio_readv = raw_aio_readv, .bdrv_aio_writev = raw_aio_writev, .bdrv_aio_flush = raw_aio_flush, + .bdrv_refresh_limits = raw_refresh_limits, .bdrv_truncate = raw_truncate, .bdrv_getlength = raw_getlength, @@ -2110,6 +2177,7 @@ static BlockDriver bdrv_host_cdrom = { .bdrv_aio_readv = raw_aio_readv, .bdrv_aio_writev = raw_aio_writev, .bdrv_aio_flush = raw_aio_flush, + .bdrv_refresh_limits = raw_refresh_limits, .bdrv_truncate = raw_truncate, .bdrv_getlength = raw_getlength, diff --git a/block/raw-win32.c b/block/raw-win32.c index ce314fd..beb7f23 100644 --- a/block/raw-win32.c +++ b/block/raw-win32.c @@ -202,6 +202,35 @@ static int set_sparse(int fd) NULL, 0, NULL, 0, &returned, NULL); } +static void raw_probe_alignment(BlockDriverState *bs) +{ + BDRVRawState *s = bs->opaque; + DWORD sectorsPerCluster, freeClusters, totalClusters, count; + DISK_GEOMETRY_EX dg; + BOOL status; + + if (s->type == FTYPE_CD) { + bs->request_alignment = 2048; + return; + } + if (s->type == FTYPE_HARDDISK) { + status = DeviceIoControl(s->hfile, IOCTL_DISK_GET_DRIVE_GEOMETRY_EX, + NULL, 0, &dg, sizeof(dg), &count, NULL); + if (status != 0) { + bs->request_alignment = dg.Geometry.BytesPerSector; + return; + } + /* try GetDiskFreeSpace too */ + } + + if (s->drive_path[0]) { + GetDiskFreeSpace(s->drive_path, §orsPerCluster, + &dg.Geometry.BytesPerSector, + &freeClusters, &totalClusters); + bs->request_alignment = dg.Geometry.BytesPerSector; + } +} + static void raw_parse_flags(int flags, int *access_flags, DWORD *overlapped) { assert(access_flags != NULL); @@ -269,6 +298,17 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags, } } + if (filename[0] && filename[1] == ':') { + snprintf(s->drive_path, sizeof(s->drive_path), "%c:\\", filename[0]); + } else if (filename[0] == '\\' && filename[1] == '\\') { + s->drive_path[0] = 0; + } else { + /* Relative path. */ + char buf[MAX_PATH]; + GetCurrentDirectory(MAX_PATH, buf); + snprintf(s->drive_path, sizeof(s->drive_path), "%c:\\", buf[0]); + } + s->hfile = CreateFile(filename, access_flags, FILE_SHARE_READ, NULL, OPEN_EXISTING, overlapped, NULL); @@ -293,6 +333,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags, s->aio = aio; } + raw_probe_alignment(bs); ret = 0; fail: qemu_opts_del(opts); diff --git a/block/rbd.c b/block/rbd.c index f453f04..121fae2 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -95,18 +95,13 @@ typedef struct RADOSCB { #define RBD_FD_WRITE 1 typedef struct BDRVRBDState { - int fds[2]; rados_t cluster; rados_ioctx_t io_ctx; rbd_image_t image; char name[RBD_MAX_IMAGE_NAME_SIZE]; char *snap; - int event_reader_pos; - RADOSCB *event_rcb; } BDRVRBDState; -static void rbd_aio_bh_cb(void *opaque); - static int qemu_rbd_next_tok(char *dst, int dst_len, char *src, char delim, const char *name, @@ -369,9 +364,8 @@ static int qemu_rbd_create(const char *filename, QEMUOptionParameter *options, } /* - * This aio completion is being called from qemu_rbd_aio_event_reader() - * and runs in qemu context. It schedules a bh, but just in case the aio - * was not cancelled before. + * This aio completion is being called from rbd_finish_bh() and runs in qemu + * BH context. */ static void qemu_rbd_complete_aio(RADOSCB *rcb) { @@ -401,36 +395,19 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb) acb->ret = r; } } - /* Note that acb->bh can be NULL in case where the aio was cancelled */ - acb->bh = qemu_bh_new(rbd_aio_bh_cb, acb); - qemu_bh_schedule(acb->bh); - g_free(rcb); -} -/* - * aio fd read handler. It runs in the qemu context and calls the - * completion handling of completed rados aio operations. - */ -static void qemu_rbd_aio_event_reader(void *opaque) -{ - BDRVRBDState *s = opaque; + g_free(rcb); - ssize_t ret; + if (acb->cmd == RBD_AIO_READ) { + qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size); + } + qemu_vfree(acb->bounce); + acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret)); + acb->status = 0; - do { - char *p = (char *)&s->event_rcb; - - /* now read the rcb pointer that was sent from a non qemu thread */ - ret = read(s->fds[RBD_FD_READ], p + s->event_reader_pos, - sizeof(s->event_rcb) - s->event_reader_pos); - if (ret > 0) { - s->event_reader_pos += ret; - if (s->event_reader_pos == sizeof(s->event_rcb)) { - s->event_reader_pos = 0; - qemu_rbd_complete_aio(s->event_rcb); - } - } - } while (ret < 0 && errno == EINTR); + if (!acb->cancelled) { + qemu_aio_release(acb); + } } /* TODO Convert to fine grained options */ @@ -538,23 +515,9 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, bs->read_only = (s->snap != NULL); - s->event_reader_pos = 0; - r = qemu_pipe(s->fds); - if (r < 0) { - error_report("error opening eventfd"); - goto failed; - } - fcntl(s->fds[0], F_SETFL, O_NONBLOCK); - fcntl(s->fds[1], F_SETFL, O_NONBLOCK); - qemu_aio_set_fd_handler(s->fds[RBD_FD_READ], qemu_rbd_aio_event_reader, - NULL, s); - - qemu_opts_del(opts); return 0; -failed: - rbd_close(s->image); failed_open: rados_ioctx_destroy(s->io_ctx); failed_shutdown: @@ -569,10 +532,6 @@ static void qemu_rbd_close(BlockDriverState *bs) { BDRVRBDState *s = bs->opaque; - close(s->fds[0]); - close(s->fds[1]); - qemu_aio_set_fd_handler(s->fds[RBD_FD_READ], NULL, NULL, NULL); - rbd_close(s->image); rados_ioctx_destroy(s->io_ctx); g_free(s->snap); @@ -600,34 +559,11 @@ static const AIOCBInfo rbd_aiocb_info = { .cancel = qemu_rbd_aio_cancel, }; -static int qemu_rbd_send_pipe(BDRVRBDState *s, RADOSCB *rcb) +static void rbd_finish_bh(void *opaque) { - int ret = 0; - while (1) { - fd_set wfd; - int fd = s->fds[RBD_FD_WRITE]; - - /* send the op pointer to the qemu thread that is responsible - for the aio/op completion. Must do it in a qemu thread context */ - ret = write(fd, (void *)&rcb, sizeof(rcb)); - if (ret >= 0) { - break; - } - if (errno == EINTR) { - continue; - } - if (errno != EAGAIN) { - break; - } - - FD_ZERO(&wfd); - FD_SET(fd, &wfd); - do { - ret = select(fd + 1, NULL, &wfd, NULL, NULL); - } while (ret < 0 && errno == EINTR); - } - - return ret; + RADOSCB *rcb = opaque; + qemu_bh_delete(rcb->acb->bh); + qemu_rbd_complete_aio(rcb); } /* @@ -635,40 +571,18 @@ static int qemu_rbd_send_pipe(BDRVRBDState *s, RADOSCB *rcb) * * Note: this function is being called from a non qemu thread so * we need to be careful about what we do here. Generally we only - * write to the block notification pipe, and do the rest of the - * io completion handling from qemu_rbd_aio_event_reader() which - * runs in a qemu context. + * schedule a BH, and do the rest of the io completion handling + * from rbd_finish_bh() which runs in a qemu context. */ static void rbd_finish_aiocb(rbd_completion_t c, RADOSCB *rcb) { - int ret; + RBDAIOCB *acb = rcb->acb; + rcb->ret = rbd_aio_get_return_value(c); rbd_aio_release(c); - ret = qemu_rbd_send_pipe(rcb->s, rcb); - if (ret < 0) { - error_report("failed writing to acb->s->fds"); - g_free(rcb); - } -} - -/* Callback when all queued rbd_aio requests are complete */ -static void rbd_aio_bh_cb(void *opaque) -{ - RBDAIOCB *acb = opaque; - - if (acb->cmd == RBD_AIO_READ) { - qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size); - } - qemu_vfree(acb->bounce); - acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret)); - qemu_bh_delete(acb->bh); - acb->bh = NULL; - acb->status = 0; - - if (!acb->cancelled) { - qemu_aio_release(acb); - } + acb->bh = qemu_bh_new(rbd_finish_bh, rcb); + qemu_bh_schedule(acb->bh); } static int rbd_aio_discard_wrapper(rbd_image_t image, diff --git a/block/sheepdog.c b/block/sheepdog.c index b94ab6e..672b9c9 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -161,7 +161,7 @@ typedef struct SheepdogVdiReq { uint32_t id; uint32_t data_length; uint64_t vdi_size; - uint32_t vdi_id; + uint32_t base_vdi_id; uint8_t copies; uint8_t copy_policy; uint8_t reserved[2]; @@ -1493,7 +1493,7 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot) memset(&hdr, 0, sizeof(hdr)); hdr.opcode = SD_OP_NEW_VDI; - hdr.vdi_id = s->inode.vdi_id; + hdr.base_vdi_id = s->inode.vdi_id; wlen = SD_MAX_VDI_LEN; @@ -1534,7 +1534,7 @@ static int sd_prealloc(const char *filename) Error *local_err = NULL; int ret; - ret = bdrv_file_open(&bs, filename, NULL, BDRV_O_RDWR, &local_err); + ret = bdrv_file_open(&bs, filename, NULL, NULL, BDRV_O_RDWR, &local_err); if (ret < 0) { qerror_report_err(local_err); error_free(local_err); @@ -1684,7 +1684,7 @@ static int sd_create(const char *filename, QEMUOptionParameter *options, if (backing_file) { BlockDriverState *bs; - BDRVSheepdogState *s; + BDRVSheepdogState *base; BlockDriver *drv; /* Currently, only Sheepdog backing image is supported. */ @@ -1695,22 +1695,22 @@ static int sd_create(const char *filename, QEMUOptionParameter *options, goto out; } - ret = bdrv_file_open(&bs, backing_file, NULL, 0, &local_err); + ret = bdrv_file_open(&bs, backing_file, NULL, NULL, 0, &local_err); if (ret < 0) { qerror_report_err(local_err); error_free(local_err); goto out; } - s = bs->opaque; + base = bs->opaque; - if (!is_snapshot(&s->inode)) { + if (!is_snapshot(&base->inode)) { error_report("cannot clone from a non snapshot vdi"); bdrv_unref(bs); ret = -EINVAL; goto out; } - + s->inode.vdi_id = base->inode.vdi_id; bdrv_unref(bs); } @@ -1743,7 +1743,7 @@ static void sd_close(BlockDriverState *bs) memset(&hdr, 0, sizeof(hdr)); hdr.opcode = SD_OP_RELEASE_VDI; - hdr.vdi_id = s->inode.vdi_id; + hdr.base_vdi_id = s->inode.vdi_id; wlen = strlen(s->name) + 1; hdr.data_length = wlen; hdr.flags = SD_FLAG_CMD_WRITE; @@ -1846,7 +1846,7 @@ static bool sd_delete(BDRVSheepdogState *s) unsigned int wlen = SD_MAX_VDI_LEN, rlen = 0; SheepdogVdiReq hdr = { .opcode = SD_OP_DEL_VDI, - .vdi_id = s->inode.vdi_id, + .base_vdi_id = s->inode.vdi_id, .data_length = wlen, .flags = SD_FLAG_CMD_WRITE, }; @@ -2442,11 +2442,12 @@ sd_co_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors, { BDRVSheepdogState *s = bs->opaque; SheepdogInode *inode = &s->inode; - unsigned long start = sector_num * BDRV_SECTOR_SIZE / SD_DATA_OBJ_SIZE, + uint64_t offset = sector_num * BDRV_SECTOR_SIZE; + unsigned long start = offset / SD_DATA_OBJ_SIZE, end = DIV_ROUND_UP((sector_num + nb_sectors) * BDRV_SECTOR_SIZE, SD_DATA_OBJ_SIZE); unsigned long idx; - int64_t ret = BDRV_BLOCK_DATA; + int64_t ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | offset; for (idx = start; idx < end; idx++) { if (inode->data_vdi_id[idx] == 0) { diff --git a/block/stream.c b/block/stream.c index 46bec7d..dd0b4ac 100644 --- a/block/stream.c +++ b/block/stream.c @@ -75,6 +75,8 @@ static void close_unused_images(BlockDriverState *top, BlockDriverState *base, unused->backing_hd = NULL; bdrv_unref(unused); } + + bdrv_refresh_limits(top); } static void coroutine_fn stream_run(void *opaque) diff --git a/block/vhdx.c b/block/vhdx.c index 1995778..9ee0a61 100644 --- a/block/vhdx.c +++ b/block/vhdx.c @@ -1797,7 +1797,7 @@ static int vhdx_create(const char *filename, QEMUOptionParameter *options, goto exit; } - ret = bdrv_file_open(&bs, filename, NULL, BDRV_O_RDWR, &local_err); + ret = bdrv_file_open(&bs, filename, NULL, NULL, BDRV_O_RDWR, &local_err); if (ret < 0) { error_propagate(errp, local_err); goto exit; diff --git a/block/vmdk.c b/block/vmdk.c index c6b60b4..99ca60f 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -428,10 +428,6 @@ static int vmdk_add_extent(BlockDriverState *bs, extent->l2_size = l2_size; extent->cluster_sectors = flat ? sectors : cluster_sectors; - if (!flat) { - bs->bl.write_zeroes_alignment = - MAX(bs->bl.write_zeroes_alignment, cluster_sectors); - } if (s->num_extents > 1) { extent->end_sector = (*(extent - 1)).end_sector + extent->sectors; } else { @@ -640,6 +636,13 @@ static int vmdk_open_vmdk4(BlockDriverState *bs, if (le32_to_cpu(header.flags) & VMDK4_FLAG_RGD) { l1_backup_offset = le64_to_cpu(header.rgd_offset) << 9; } + if (bdrv_getlength(file) < + le64_to_cpu(header.grain_offset) * BDRV_SECTOR_SIZE) { + error_report("File truncated, expecting at least %lld bytes", + le64_to_cpu(header.grain_offset) * BDRV_SECTOR_SIZE); + return -EINVAL; + } + ret = vmdk_add_extent(bs, file, false, le64_to_cpu(header.capacity), le64_to_cpu(header.gd_offset) << 9, @@ -654,6 +657,10 @@ static int vmdk_open_vmdk4(BlockDriverState *bs, } extent->compressed = le16_to_cpu(header.compressAlgorithm) == VMDK4_COMPRESSION_DEFLATE; + if (extent->compressed) { + g_free(s->create_type); + s->create_type = g_strdup("streamOptimized"); + } extent->has_marker = le32_to_cpu(header.flags) & VMDK4_FLAG_MARKER; extent->version = le32_to_cpu(header.version); extent->has_zero_grain = le32_to_cpu(header.flags) & VMDK4_FLAG_ZERO_GRAIN; @@ -769,8 +776,8 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs, path_combine(extent_path, sizeof(extent_path), desc_file_path, fname); - ret = bdrv_file_open(&extent_file, extent_path, NULL, bs->open_flags, - errp); + ret = bdrv_file_open(&extent_file, extent_path, NULL, NULL, + bs->open_flags, errp); if (ret) { return ret; } @@ -891,6 +898,23 @@ fail: return ret; } + +static int vmdk_refresh_limits(BlockDriverState *bs) +{ + BDRVVmdkState *s = bs->opaque; + int i; + + for (i = 0; i < s->num_extents; i++) { + if (!s->extents[i].flat) { + bs->bl.write_zeroes_alignment = + MAX(bs->bl.write_zeroes_alignment, + s->extents[i].cluster_sectors); + } + } + + return 0; +} + static int get_whole_cluster(BlockDriverState *bs, VmdkExtent *extent, uint64_t cluster_offset, @@ -1325,8 +1349,8 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num, { BDRVVmdkState *s = bs->opaque; VmdkExtent *extent = NULL; - int n, ret; - int64_t index_in_cluster; + int ret; + int64_t index_in_cluster, n; uint64_t extent_begin_sector, extent_relative_sector_num; uint64_t cluster_offset; VmdkMetaData m_data; @@ -1469,7 +1493,7 @@ static int vmdk_create_extent(const char *filename, int64_t filesize, goto exit; } - ret = bdrv_file_open(&bs, filename, NULL, BDRV_O_RDWR, &local_err); + ret = bdrv_file_open(&bs, filename, NULL, NULL, BDRV_O_RDWR, &local_err); if (ret < 0) { error_propagate(errp, local_err); goto exit; @@ -1807,7 +1831,7 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options, goto exit; } } - ret = bdrv_file_open(&new_bs, filename, NULL, BDRV_O_RDWR, &local_err); + ret = bdrv_file_open(&new_bs, filename, NULL, NULL, BDRV_O_RDWR, &local_err); if (ret < 0) { error_setg_errno(errp, -ret, "Could not write description"); goto exit; @@ -2002,6 +2026,7 @@ static BlockDriver bdrv_vmdk = { .bdrv_get_allocated_file_size = vmdk_get_allocated_file_size, .bdrv_has_zero_init = vmdk_has_zero_init, .bdrv_get_specific_info = vmdk_get_specific_info, + .bdrv_refresh_limits = vmdk_refresh_limits, .create_options = vmdk_create_options, }; @@ -307,12 +307,11 @@ static bool check_throttle_config(ThrottleConfig *cfg, Error **errp) typedef enum { MEDIA_DISK, MEDIA_CDROM } DriveMediaType; /* Takes the ownership of bs_opts */ -static DriveInfo *blockdev_init(QDict *bs_opts, +static DriveInfo *blockdev_init(const char *file, QDict *bs_opts, BlockInterfaceType type, Error **errp) { const char *buf; - const char *file = NULL; const char *serial; int ro = 0; int bdrv_flags = 0; @@ -354,7 +353,6 @@ static DriveInfo *blockdev_init(QDict *bs_opts, ro = qemu_opt_get_bool(opts, "read-only", 0); copy_on_read = qemu_opt_get_bool(opts, "copy-on-read", false); - file = qemu_opt_get(opts, "file"); serial = qemu_opt_get(opts, "serial"); if ((buf = qemu_opt_get(opts, "discard")) != NULL) { @@ -599,6 +597,10 @@ QemuOptsList qemu_legacy_drive_opts = { .name = "addr", .type = QEMU_OPT_STRING, .help = "pci address (virtio only)", + },{ + .name = "file", + .type = QEMU_OPT_STRING, + .help = "file name", }, /* Options that are passed on, but have special semantics with -drive */ @@ -629,6 +631,7 @@ DriveInfo *drive_init(QemuOpts *all_opts, BlockInterfaceType block_default_type) const char *devaddr; bool read_only = false; bool copy_on_read; + const char *filename; Error *local_err = NULL; /* Change legacy command line options into QMP ones */ @@ -867,8 +870,10 @@ DriveInfo *drive_init(QemuOpts *all_opts, BlockInterfaceType block_default_type) } } + filename = qemu_opt_get(legacy_opts, "file"); + /* Actual block device init: Functionality shared with blockdev-add */ - dinfo = blockdev_init(bs_opts, type, &local_err); + dinfo = blockdev_init(filename, bs_opts, type, &local_err); if (dinfo == NULL) { if (error_is_set(&local_err)) { qerror_report_err(local_err); @@ -942,14 +947,22 @@ static void blockdev_do_action(int kind, void *data, Error **errp) qmp_transaction(&list, errp); } -void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file, +void qmp_blockdev_snapshot_sync(bool has_device, const char *device, + bool has_node_name, const char *node_name, + const char *snapshot_file, + bool has_snapshot_node_name, + const char *snapshot_node_name, bool has_format, const char *format, - bool has_mode, enum NewImageMode mode, - Error **errp) + bool has_mode, NewImageMode mode, Error **errp) { BlockdevSnapshot snapshot = { + .has_device = has_device, .device = (char *) device, + .has_node_name = has_node_name, + .node_name = (char *) node_name, .snapshot_file = (char *) snapshot_file, + .has_snapshot_node_name = has_snapshot_node_name, + .snapshot_node_name = (char *) snapshot_node_name, .has_format = has_format, .format = (char *) format, .has_mode = has_mode, @@ -1187,8 +1200,14 @@ static void external_snapshot_prepare(BlkTransactionState *common, { BlockDriver *drv; int flags, ret; + QDict *options = NULL; Error *local_err = NULL; + bool has_device = false; const char *device; + bool has_node_name = false; + const char *node_name; + bool has_snapshot_node_name = false; + const char *snapshot_node_name; const char *new_image_file; const char *format = "qcow2"; enum NewImageMode mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS; @@ -1199,7 +1218,14 @@ static void external_snapshot_prepare(BlkTransactionState *common, /* get parameters */ g_assert(action->kind == TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC); + has_device = action->blockdev_snapshot_sync->has_device; device = action->blockdev_snapshot_sync->device; + has_node_name = action->blockdev_snapshot_sync->has_node_name; + node_name = action->blockdev_snapshot_sync->node_name; + has_snapshot_node_name = + action->blockdev_snapshot_sync->has_snapshot_node_name; + snapshot_node_name = action->blockdev_snapshot_sync->snapshot_node_name; + new_image_file = action->blockdev_snapshot_sync->snapshot_file; if (action->blockdev_snapshot_sync->has_format) { format = action->blockdev_snapshot_sync->format; @@ -1215,9 +1241,21 @@ static void external_snapshot_prepare(BlkTransactionState *common, return; } - state->old_bs = bdrv_find(device); - if (!state->old_bs) { - error_set(errp, QERR_DEVICE_NOT_FOUND, device); + state->old_bs = bdrv_lookup_bs(has_device ? device : NULL, + has_node_name ? node_name : NULL, + &local_err); + if (error_is_set(&local_err)) { + error_propagate(errp, local_err); + return; + } + + if (has_node_name && !has_snapshot_node_name) { + error_setg(errp, "New snapshot node name missing"); + return; + } + + if (has_snapshot_node_name && bdrv_find_node(snapshot_node_name)) { + error_setg(errp, "New snapshot node name already existing"); return; } @@ -1238,7 +1276,7 @@ static void external_snapshot_prepare(BlkTransactionState *common, } } - if (bdrv_check_ext_snapshot(state->old_bs) != EXT_SNAPSHOT_ALLOWED) { + if (!bdrv_is_first_non_filter(state->old_bs)) { error_set(errp, QERR_FEATURE_DISABLED, "snapshot"); return; } @@ -1257,15 +1295,23 @@ static void external_snapshot_prepare(BlkTransactionState *common, } } + if (has_snapshot_node_name) { + options = qdict_new(); + qdict_put(options, "node-name", + qstring_from_str(snapshot_node_name)); + } + /* We will manually add the backing_hd field to the bs later */ state->new_bs = bdrv_new(""); /* TODO Inherit bs->options or only take explicit options with an * extended QMP command? */ - ret = bdrv_open(state->new_bs, new_image_file, NULL, + ret = bdrv_open(state->new_bs, new_image_file, options, flags | BDRV_O_NO_BACKING, drv, &local_err); if (ret != 0) { error_propagate(errp, local_err); } + + QDECREF(options); } static void external_snapshot_commit(BlkTransactionState *common) @@ -1476,14 +1522,19 @@ void qmp_eject(const char *device, bool has_force, bool force, Error **errp) eject_device(bs, force, errp); } -void qmp_block_passwd(const char *device, const char *password, Error **errp) +void qmp_block_passwd(bool has_device, const char *device, + bool has_node_name, const char *node_name, + const char *password, Error **errp) { + Error *local_err = NULL; BlockDriverState *bs; int err; - bs = bdrv_find(device); - if (!bs) { - error_set(errp, QERR_DEVICE_NOT_FOUND, device); + bs = bdrv_lookup_bs(has_device ? device : NULL, + has_node_name ? node_name : NULL, + &local_err); + if (error_is_set(&local_err)) { + error_propagate(errp, local_err); return; } @@ -1673,14 +1724,24 @@ int do_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data) return 0; } -void qmp_block_resize(const char *device, int64_t size, Error **errp) +void qmp_block_resize(bool has_device, const char *device, + bool has_node_name, const char *node_name, + int64_t size, Error **errp) { + Error *local_err = NULL; BlockDriverState *bs; int ret; - bs = bdrv_find(device); - if (!bs) { - error_set(errp, QERR_DEVICE_NOT_FOUND, device); + bs = bdrv_lookup_bs(has_device ? device : NULL, + has_node_name ? node_name : NULL, + &local_err); + if (error_is_set(&local_err)) { + error_propagate(errp, local_err); + return; + } + + if (!bdrv_is_first_non_filter(bs)) { + error_set(errp, QERR_FEATURE_DISABLED, "resize"); return; } @@ -1947,6 +2008,11 @@ void qmp_drive_backup(const char *device, const char *target, } } +BlockDeviceInfoList *qmp_query_named_block_nodes(Error **errp) +{ + return bdrv_named_nodes_list(); +} + #define DEFAULT_MIRROR_BUF_SIZE (10 << 20) void qmp_drive_mirror(const char *device, const char *target, @@ -2210,7 +2276,7 @@ void qmp_blockdev_add(BlockdevOptions *options, Error **errp) qdict_flatten(qdict); - blockdev_init(qdict, IF_NONE, &local_err); + blockdev_init(NULL, qdict, IF_NONE, &local_err); if (error_is_set(&local_err)) { error_propagate(errp, local_err); goto fail; @@ -2251,10 +2317,6 @@ QemuOptsList qemu_common_drive_opts = { .type = QEMU_OPT_BOOL, .help = "enable/disable snapshot mode", },{ - .name = "file", - .type = QEMU_OPT_STRING, - .help = "disk image", - },{ .name = "discard", .type = QEMU_OPT_STRING, .help = "discard operation (ignore/off, unmap/on)", @@ -256,6 +256,7 @@ coroutine_pool="" seccomp="" glusterfs="" glusterfs_discard="no" +glusterfs_zerofill="no" virtio_blk_data_plane="" gtk="" gtkabi="2.0" @@ -2701,6 +2702,9 @@ if test "$glusterfs" != "no" ; then if $pkg_config --atleast-version=5 glusterfs-api; then glusterfs_discard="yes" fi + if $pkg_config --atleast-version=6 glusterfs-api; then + glusterfs_zerofill="yes" + fi else if test "$glusterfs" = "yes" ; then feature_not_found "GlusterFS backend support" @@ -4229,6 +4233,10 @@ if test "$glusterfs_discard" = "yes" ; then echo "CONFIG_GLUSTERFS_DISCARD=y" >> $config_host_mak fi +if test "$glusterfs_zerofill" = "yes" ; then + echo "CONFIG_GLUSTERFS_ZEROFILL=y" >> $config_host_mak +fi + if test "$libssh2" = "yes" ; then echo "CONFIG_LIBSSH2=y" >> $config_host_mak fi diff --git a/hmp-commands.hx b/hmp-commands.hx index feca084..f3fc514 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -35,6 +35,11 @@ STEXI @item commit @findex commit Commit changes to the disk images (if -snapshot is used) or backing files. +If the backing file is smaller than the snapshot, then the backing file will be +resized to be the same size as the snapshot. If the snapshot is smaller than +the backing file, the backing file will not be truncated. If you want the +backing file to match the size of the smaller snapshot, you can safely truncate +it yourself once the commit operation successfully completes. ETEXI { @@ -871,7 +871,7 @@ void hmp_block_passwd(Monitor *mon, const QDict *qdict) const char *password = qdict_get_str(qdict, "password"); Error *errp = NULL; - qmp_block_passwd(device, password, &errp); + qmp_block_passwd(true, device, false, NULL, password, &errp); hmp_handle_error(mon, &errp); } @@ -893,7 +893,7 @@ void hmp_block_resize(Monitor *mon, const QDict *qdict) int64_t size = qdict_get_int(qdict, "size"); Error *errp = NULL; - qmp_block_resize(device, size, &errp); + qmp_block_resize(true, device, false, NULL, size, &errp); hmp_handle_error(mon, &errp); } @@ -972,7 +972,9 @@ void hmp_snapshot_blkdev(Monitor *mon, const QDict *qdict) } mode = reuse ? NEW_IMAGE_MODE_EXISTING : NEW_IMAGE_MODE_ABSOLUTE_PATHS; - qmp_blockdev_snapshot_sync(device, filename, !!format, format, + qmp_blockdev_snapshot_sync(true, device, false, NULL, + filename, false, NULL, + !!format, format, true, mode, &errp); hmp_handle_error(mon, &errp); } @@ -1092,11 +1094,11 @@ void hmp_eject(Monitor *mon, const QDict *qdict) hmp_handle_error(mon, &err); } -static void hmp_change_read_arg(Monitor *mon, const char *password, - void *opaque) +static void hmp_change_read_arg(void *opaque, const char *password, + void *readline_opaque) { qmp_change_vnc_password(password, NULL); - monitor_read_command(mon, 1); + monitor_read_command(opaque, 1); } void hmp_change(Monitor *mon, const QDict *qdict) diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index 19d0961..8a568e5 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -731,7 +731,7 @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp) register_savevm(dev, "virtio-blk", virtio_blk_id++, 2, virtio_blk_save, virtio_blk_load, s); bdrv_set_dev_ops(s->bs, &virtio_block_ops, s); - bdrv_set_buffer_alignment(s->bs, s->conf->logical_block_size); + bdrv_set_guest_block_size(s->bs, s->conf->logical_block_size); bdrv_iostatus_enable(s->bs); diff --git a/hw/ide/core.c b/hw/ide/core.c index e1f4c33..036cd4a 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -2103,7 +2103,7 @@ int ide_init_drive(IDEState *s, BlockDriverState *bs, IDEDriveKind kind, s->smart_selftest_count = 0; if (kind == IDE_CD) { bdrv_set_dev_ops(bs, &ide_cd_block_ops, s); - bdrv_set_buffer_alignment(bs, 2048); + bdrv_set_guest_block_size(bs, 2048); } else { if (!bdrv_is_inserted(s->bs)) { error_report("Device needs media, but drive is empty"); diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c index bce617c..6491091 100644 --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c @@ -2254,7 +2254,7 @@ static int scsi_initfn(SCSIDevice *dev) } else { bdrv_set_dev_ops(s->qdev.conf.bs, &scsi_disk_block_ops, s); } - bdrv_set_buffer_alignment(s->qdev.conf.bs, s->qdev.blocksize); + bdrv_set_guest_block_size(s->qdev.conf.bs, s->qdev.blocksize); bdrv_iostatus_enable(s->qdev.conf.bs); add_boot_device_path(s->qdev.conf.bootindex, &dev->qdev, NULL); diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c index 8f195be..f08b64e1 100644 --- a/hw/scsi/scsi-generic.c +++ b/hw/scsi/scsi-generic.c @@ -210,7 +210,7 @@ static void scsi_read_complete(void * opaque, int ret) s->blocksize = ldl_be_p(&r->buf[8]); s->max_lba = ldq_be_p(&r->buf[0]); } - bdrv_set_buffer_alignment(s->conf.bs, s->blocksize); + bdrv_set_guest_block_size(s->conf.bs, s->blocksize); scsi_req_data(&r->req, len); if (!r->req.io_canceled) { diff --git a/hw/virtio/dataplane/vring.c b/hw/virtio/dataplane/vring.c index 250d45e..665a1ff 100644 --- a/hw/virtio/dataplane/vring.c +++ b/hw/virtio/dataplane/vring.c @@ -376,7 +376,7 @@ int vring_pop(VirtIODevice *vdev, Vring *vring, barrier(); if (desc.flags & VRING_DESC_F_INDIRECT) { - int ret = get_indirect(vring, elem, &desc); + ret = get_indirect(vring, elem, &desc); if (ret < 0) { goto out; } diff --git a/include/block/block.h b/include/block/block.h index 36efaea..963a61f 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -184,7 +184,11 @@ void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top); int bdrv_parse_cache_flags(const char *mode, int *flags); int bdrv_parse_discard_flags(const char *mode, int *flags); int bdrv_file_open(BlockDriverState **pbs, const char *filename, - QDict *options, int flags, Error **errp); + const char *reference, QDict *options, int flags, + Error **errp); +int bdrv_open_image(BlockDriverState **pbs, const char *filename, + QDict *options, const char *bdref_key, int flags, + bool force_raw, bool allow_none, Error **errp); int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp); int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options, int flags, BlockDriver *drv, Error **errp); @@ -220,7 +224,6 @@ BlockDriverAIOCB *bdrv_aio_write_zeroes(BlockDriverState *bs, int64_t sector_num int nb_sectors, BdrvRequestFlags flags, BlockDriverCompletionFunc *cb, void *opaque); int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags); -int bdrv_writev(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov); int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int count); int bdrv_pwrite(BlockDriverState *bs, int64_t offset, @@ -249,6 +252,7 @@ int bdrv_truncate(BlockDriverState *bs, int64_t offset); int64_t bdrv_getlength(BlockDriverState *bs); int64_t bdrv_get_allocated_file_size(BlockDriverState *bs); void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr); +int bdrv_refresh_limits(BlockDriverState *bs); int bdrv_commit(BlockDriverState *bs); int bdrv_commit_all(void); int bdrv_change_backing_file(BlockDriverState *bs, @@ -283,16 +287,16 @@ int bdrv_amend_options(BlockDriverState *bs_new, QEMUOptionParameter *options); /* external snapshots */ typedef enum { - EXT_SNAPSHOT_ALLOWED, - EXT_SNAPSHOT_FORBIDDEN, -} ExtSnapshotPerm; + BS_IS_A_FILTER, + BS_FILTER_PASS_DOWN, + BS_AUTHORIZATION_COUNT, +} BsAuthorization; -/* return EXT_SNAPSHOT_ALLOWED if external snapshot is allowed - * return EXT_SNAPSHOT_FORBIDDEN if external snapshot is forbidden - */ -ExtSnapshotPerm bdrv_check_ext_snapshot(BlockDriverState *bs); -/* helper used to forbid external snapshots like in blkverify */ -ExtSnapshotPerm bdrv_check_ext_snapshot_forbidden(BlockDriverState *bs); +bool bdrv_generic_is_first_non_filter(BlockDriverState *bs, + BlockDriverState *candidate); +bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs, + BlockDriverState *candidate); +bool bdrv_is_first_non_filter(BlockDriverState *candidate); /* async block I/O */ typedef void BlockDriverDirtyHandler(BlockDriverState *bs, int64_t sector, @@ -374,6 +378,11 @@ void bdrv_lock_medium(BlockDriverState *bs, bool locked); void bdrv_eject(BlockDriverState *bs, bool eject_flag); const char *bdrv_get_format_name(BlockDriverState *bs); BlockDriverState *bdrv_find(const char *name); +BlockDriverState *bdrv_find_node(const char *node_name); +BlockDeviceInfoList *bdrv_named_nodes_list(void); +BlockDriverState *bdrv_lookup_bs(const char *device, + const char *node_name, + Error **errp); BlockDriverState *bdrv_next(BlockDriverState *bs); void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque); @@ -418,7 +427,10 @@ void bdrv_img_create(const char *filename, const char *fmt, char *options, uint64_t img_size, int flags, Error **errp, bool quiet); -void bdrv_set_buffer_alignment(BlockDriverState *bs, int align); +/* Returns the alignment in bytes that is required so that no bounce buffer + * is required throughout the stack */ +size_t bdrv_opt_mem_align(BlockDriverState *bs); +void bdrv_set_guest_block_size(BlockDriverState *bs, int align); void *qemu_blockalign(BlockDriverState *bs, size_t size); bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov); @@ -515,6 +527,14 @@ typedef enum { BLKDBG_FLUSH_TO_OS, BLKDBG_FLUSH_TO_DISK, + BLKDBG_PWRITEV_RMW_HEAD, + BLKDBG_PWRITEV_RMW_AFTER_HEAD, + BLKDBG_PWRITEV_RMW_TAIL, + BLKDBG_PWRITEV_RMW_AFTER_TAIL, + BLKDBG_PWRITEV, + BLKDBG_PWRITEV_ZERO, + BLKDBG_PWRITEV_DONE, + BLKDBG_EVENT_MAX, } BlkDebugEvent; diff --git a/include/block/block_int.h b/include/block/block_int.h index 2772f2f..0bcf1c9 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -57,22 +57,35 @@ typedef struct BdrvTrackedRequest { BlockDriverState *bs; - int64_t sector_num; - int nb_sectors; + int64_t offset; + unsigned int bytes; bool is_write; + + bool serialising; + int64_t overlap_offset; + unsigned int overlap_bytes; + QLIST_ENTRY(BdrvTrackedRequest) list; Coroutine *co; /* owner, used for deadlock detection */ CoQueue wait_queue; /* coroutines blocked on this request */ + + struct BdrvTrackedRequest *waiting_for; } BdrvTrackedRequest; struct BlockDriver { const char *format_name; int instance_size; - /* if not defined external snapshots are allowed - * future block filters will query their children to build the response + /* this table of boolean contains authorizations for the block operations */ + bool authorizations[BS_AUTHORIZATION_COUNT]; + /* for snapshots complex block filter like Quorum can implement the + * following recursive callback instead of BS_IS_A_FILTER. + * It's purpose is to recurse on the filter children while calling + * bdrv_recurse_is_first_non_filter on them. + * For a sample implementation look in the future Quorum block filter. */ - ExtSnapshotPerm (*bdrv_check_ext_snapshot)(BlockDriverState *bs); + bool (*bdrv_recurse_is_first_non_filter)(BlockDriverState *bs, + BlockDriverState *candidate); int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename); int (*bdrv_probe_device)(const char *filename); @@ -226,6 +239,8 @@ struct BlockDriver { int (*bdrv_debug_resume)(BlockDriverState *bs, const char *tag); bool (*bdrv_debug_is_suspended)(BlockDriverState *bs, const char *tag); + int (*bdrv_refresh_limits)(BlockDriverState *bs); + /* * Returns 1 if newly created images are guaranteed to contain only * zeros, 0 otherwise. @@ -250,6 +265,9 @@ typedef struct BlockLimits { /* optimal transfer length in sectors */ int opt_transfer_length; + + /* memory alignment so that no bounce buffer is needed */ + size_t opt_mem_alignment; } BlockLimits; /* @@ -291,8 +309,8 @@ struct BlockDriverState { /* Callback before write request is processed */ NotifierWithReturnList before_write_notifiers; - /* number of in-flight copy-on-read requests */ - unsigned int copy_on_read_in_flight; + /* number of in-flight serialising requests */ + unsigned int serialising_in_flight; /* I/O throttling */ ThrottleState throttle_state; @@ -314,8 +332,11 @@ struct BlockDriverState { /* Whether produces zeros when read beyond eof */ bool zero_beyond_eof; - /* the memory alignment required for the buffers handled by this driver */ - int buffer_alignment; + /* Alignment requirement for offset/length of I/O requests */ + unsigned int request_alignment; + + /* the block size for which the guest device expects atomicity */ + int guest_block_size; /* do we need to tell the quest if we have a volatile write cache? */ int enable_write_cache; @@ -325,11 +346,18 @@ struct BlockDriverState { BlockdevOnError on_read_error, on_write_error; bool iostatus_enabled; BlockDeviceIoStatus iostatus; + + /* the following member gives a name to every node on the bs graph. */ + char node_name[32]; + /* element of the list of named nodes building the graph */ + QTAILQ_ENTRY(BlockDriverState) node_list; + /* Device name is the name associated with the "drive" the guest sees */ char device_name[32]; + /* element of the list of "drives" the guest sees */ + QTAILQ_ENTRY(BlockDriverState) device_list; QLIST_HEAD(, BdrvDirtyBitmap) dirty_bitmaps; int refcnt; int in_use; /* users other than guest access, eg. block migration */ - QTAILQ_ENTRY(BlockDriverState) list; QLIST_HEAD(, BdrvTrackedRequest) tracked_requests; diff --git a/include/block/qapi.h b/include/block/qapi.h index 9518ee4..e92c00d 100644 --- a/include/block/qapi.h +++ b/include/block/qapi.h @@ -29,6 +29,7 @@ #include "block/block.h" #include "block/snapshot.h" +BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs); int bdrv_query_snapshot_info_list(BlockDriverState *bs, SnapshotInfoList **p_list, Error **errp); diff --git a/include/monitor/monitor.h b/include/monitor/monitor.h index 22d8b8f..7e5f752 100644 --- a/include/monitor/monitor.h +++ b/include/monitor/monitor.h @@ -5,7 +5,7 @@ #include "qapi/qmp/qerror.h" #include "qapi/qmp/qdict.h" #include "block/block.h" -#include "monitor/readline.h" +#include "qemu/readline.h" extern Monitor *cur_mon; extern Monitor *default_mon; diff --git a/include/qapi/qmp/qdict.h b/include/qapi/qmp/qdict.h index 5cefd80..1ddf97b 100644 --- a/include/qapi/qmp/qdict.h +++ b/include/qapi/qmp/qdict.h @@ -68,5 +68,6 @@ QDict *qdict_clone_shallow(const QDict *src); void qdict_flatten(QDict *qdict); void qdict_extract_subqdict(QDict *src, QDict **dst, const char *start); +void qdict_array_split(QDict *src, QList **dst); #endif /* QDICT_H */ diff --git a/include/qemu-io.h b/include/qemu-io.h index a418b46..7e7c07c 100644 --- a/include/qemu-io.h +++ b/include/qemu-io.h @@ -42,5 +42,8 @@ bool qemuio_command(BlockDriverState *bs, const char *cmd); void qemuio_add_command(const cmdinfo_t *ci); int qemuio_command_usage(const cmdinfo_t *ci); +void qemuio_complete_command(const char *input, + void (*fn)(const char *cmd, void *opaque), + void *opaque); #endif /* QEMU_IO_H */ diff --git a/include/qemu/config-file.h b/include/qemu/config-file.h index 508428f..dbd97c4 100644 --- a/include/qemu/config-file.h +++ b/include/qemu/config-file.h @@ -4,6 +4,7 @@ #include <stdio.h> #include "qemu/option.h" #include "qapi/error.h" +#include "qapi/qmp/qdict.h" QemuOptsList *qemu_find_opts(const char *group); QemuOptsList *qemu_find_opts_err(const char *group, Error **errp); @@ -18,6 +19,11 @@ int qemu_config_parse(FILE *fp, QemuOptsList **lists, const char *fname); int qemu_read_config_file(const char *filename); +/* Parse QDict options as a replacement for a config file (allowing multiple + enumerated (0..(n-1)) configuration "sections") */ +void qemu_config_parse_qdict(QDict *options, QemuOptsList **lists, + Error **errp); + /* Read default QEMU config files */ int qemu_read_default_config_files(bool userconfig); diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h index b3e2b6d..eac7172 100644 --- a/include/qemu/osdep.h +++ b/include/qemu/osdep.h @@ -240,4 +240,6 @@ static inline void qemu_init_auxval(char **envp) { } void qemu_init_auxval(char **envp); #endif +void qemu_set_tty_echo(int fd, bool echo); + #endif diff --git a/include/monitor/readline.h b/include/qemu/readline.h index 0faf6e1..a89fe4a 100644 --- a/include/monitor/readline.h +++ b/include/qemu/readline.h @@ -1,14 +1,15 @@ #ifndef READLINE_H #define READLINE_H -#include "qemu-common.h" - #define READLINE_CMD_BUF_SIZE 4095 #define READLINE_MAX_CMDS 64 #define READLINE_MAX_COMPLETIONS 256 -typedef void ReadLineFunc(Monitor *mon, const char *str, void *opaque); -typedef void ReadLineCompletionFunc(Monitor *mon, +typedef void ReadLinePrintfFunc(void *opaque, const char *fmt, ...); +typedef void ReadLineFlushFunc(void *opaque); +typedef void ReadLineFunc(void *opaque, const char *str, + void *readline_opaque); +typedef void ReadLineCompletionFunc(void *opaque, const char *cmdline); typedef struct ReadLineState { @@ -35,7 +36,10 @@ typedef struct ReadLineState { void *readline_opaque; int read_password; char prompt[256]; - Monitor *mon; + + ReadLinePrintfFunc *printf_func; + ReadLineFlushFunc *flush_func; + void *opaque; } ReadLineState; void readline_add_completion(ReadLineState *rs, const char *str); @@ -46,11 +50,13 @@ const char *readline_get_history(ReadLineState *rs, unsigned int index); void readline_handle_byte(ReadLineState *rs, int ch); void readline_start(ReadLineState *rs, const char *prompt, int read_password, - ReadLineFunc *readline_func, void *opaque); + ReadLineFunc *readline_func, void *readline_opaque); void readline_restart(ReadLineState *rs); void readline_show_prompt(ReadLineState *rs); -ReadLineState *readline_init(Monitor *mon, +ReadLineState *readline_init(ReadLinePrintfFunc *printf_func, + ReadLineFlushFunc *flush_func, + void *opaque, ReadLineCompletionFunc *completion_finder); #endif /* !READLINE_H */ @@ -37,7 +37,7 @@ #include "ui/qemu-spice.h" #include "sysemu/sysemu.h" #include "monitor/monitor.h" -#include "monitor/readline.h" +#include "qemu/readline.h" #include "ui/console.h" #include "sysemu/blockdev.h" #include "audio/audio.h" @@ -217,8 +217,8 @@ static const mon_cmd_t qmp_cmds[]; Monitor *cur_mon; Monitor *default_mon; -static void monitor_command_cb(Monitor *mon, const char *cmdline, - void *opaque); +static void monitor_command_cb(void *opaque, const char *cmdline, + void *readline_opaque); static inline int qmp_cmd_mode(const Monitor *mon) { @@ -4338,9 +4338,10 @@ static void monitor_find_completion_by_table(Monitor *mon, } } -static void monitor_find_completion(Monitor *mon, +static void monitor_find_completion(void *opaque, const char *cmdline) { + Monitor *mon = opaque; char *args[MAX_ARGS]; int nb_args, len; @@ -4751,8 +4752,11 @@ static void monitor_read(void *opaque, const uint8_t *buf, int size) cur_mon = old_mon; } -static void monitor_command_cb(Monitor *mon, const char *cmdline, void *opaque) +static void monitor_command_cb(void *opaque, const char *cmdline, + void *readline_opaque) { + Monitor *mon = opaque; + monitor_suspend(mon); handle_user_command(mon, cmdline); monitor_resume(mon); @@ -4881,6 +4885,22 @@ static void sortcmdlist(void) * End: */ +/* These functions just adapt the readline interface in a typesafe way. We + * could cast function pointers but that discards compiler checks. + */ +static void monitor_readline_printf(void *opaque, const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + monitor_vprintf(opaque, fmt, ap); + va_end(ap); +} + +static void monitor_readline_flush(void *opaque) +{ + monitor_flush(opaque); +} + void monitor_init(CharDriverState *chr, int flags) { static int is_first_init = 1; @@ -4898,7 +4918,10 @@ void monitor_init(CharDriverState *chr, int flags) mon->chr = chr; mon->flags = flags; if (flags & MONITOR_USE_READLINE) { - mon->rs = readline_init(mon, monitor_find_completion); + mon->rs = readline_init(monitor_readline_printf, + monitor_readline_flush, + mon, + monitor_find_completion); monitor_read_command(mon, 0); } @@ -4920,9 +4943,11 @@ void monitor_init(CharDriverState *chr, int flags) default_mon = mon; } -static void bdrv_password_cb(Monitor *mon, const char *password, void *opaque) +static void bdrv_password_cb(void *opaque, const char *password, + void *readline_opaque) { - BlockDriverState *bs = opaque; + Monitor *mon = opaque; + BlockDriverState *bs = readline_opaque; int ret = 0; if (bdrv_set_key(bs, password) != 0) { diff --git a/qapi-schema.json b/qapi-schema.json index f27c48a..05ced9d 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -810,6 +810,8 @@ # # @file: the filename of the backing device # +# @node-name: #optional the name of the block driver node (Since 2.0) +# # @ro: true if the backing device was open read-only # # @drv: the name of the block format used to open the backing device. As of @@ -857,10 +859,9 @@ # # Since: 0.14.0 # -# Notes: This interface is only found in @BlockInfo. ## { 'type': 'BlockDeviceInfo', - 'data': { 'file': 'str', 'ro': 'bool', 'drv': 'str', + 'data': { 'file': 'str', '*node-name': 'str', 'ro': 'bool', 'drv': 'str', '*backing_file': 'str', 'backing_file_depth': 'int', 'encrypted': 'bool', 'encryption_key_missing': 'bool', 'bps': 'int', 'bps_rd': 'int', 'bps_wr': 'int', @@ -1022,15 +1023,17 @@ # # @stats: A @BlockDeviceStats for the device. # -# @parent: #optional This may point to the backing block device if this is a -# a virtual block device. If it's a backing block, this will point -# to the backing file is one is present. +# @parent: #optional This describes the file block device if it has one. +# +# @backing: #optional This describes the backing block device if it has one. +# (Since 2.0) # # Since: 0.14.0 ## { 'type': 'BlockStats', 'data': {'*device': 'str', 'stats': 'BlockDeviceStats', - '*parent': 'BlockStats'} } + '*parent': 'BlockStats', + '*backing': 'BlockStats'} } ## # @query-blockstats: @@ -1675,7 +1678,11 @@ # determine which ones are encrypted, set the passwords with this command, and # then start the guest with the @cont command. # -# @device: the name of the device to set the password on +# Either @device or @node-name must be set but not both. +# +# @device: #optional the name of the block backend device to set the password on +# +# @node-name: #optional graph node name to set the password on (Since 2.0) # # @password: the password to use for the device # @@ -1689,7 +1696,8 @@ # # Since: 0.14.0 ## -{ 'command': 'block_passwd', 'data': {'device': 'str', 'password': 'str'} } +{ 'command': 'block_passwd', 'data': {'*device': 'str', + '*node-name': 'str', 'password': 'str'} } ## # @balloon: @@ -1716,7 +1724,11 @@ # # Resize a block image while a guest is running. # -# @device: the name of the device to get the image resized +# Either @device or @node-name must be set but not both. +# +# @device: #optional the name of the device to get the image resized +# +# @node-name: #optional graph node name to get the image resized (Since 2.0) # # @size: new image size in bytes # @@ -1725,7 +1737,9 @@ # # Since: 0.14.0 ## -{ 'command': 'block_resize', 'data': { 'device': 'str', 'size': 'int' }} +{ 'command': 'block_resize', 'data': { '*device': 'str', + '*node-name': 'str', + 'size': 'int' }} ## # @NewImageMode @@ -1747,18 +1761,25 @@ ## # @BlockdevSnapshot # -# @device: the name of the device to generate the snapshot from. +# Either @device or @node-name must be set but not both. +# +# @device: #optional the name of the device to generate the snapshot from. +# +# @node-name: #optional graph node name to generate the snapshot from (Since 2.0) # # @snapshot-file: the target of the new image. A new file will be created. # +# @snapshot-node-name: #optional the graph node name of the new image (Since 2.0) +# # @format: #optional the format of the snapshot image, default is 'qcow2'. # # @mode: #optional whether and how QEMU should create a new image, default is # 'absolute-paths'. ## { 'type': 'BlockdevSnapshot', - 'data': { 'device': 'str', 'snapshot-file': 'str', '*format': 'str', - '*mode': 'NewImageMode' } } + 'data': { '*device': 'str', '*node-name': 'str', + 'snapshot-file': 'str', '*snapshot-node-name': 'str', + '*format': 'str', '*mode': 'NewImageMode' } } ## # @BlockdevSnapshotInternal @@ -1973,6 +1994,13 @@ # user needs to complete the job with the block-job-complete # command after getting the ready event. (Since 2.0) # +# If the base image is smaller than top, then the base image +# will be resized to be the same size as top. If top is +# smaller than the base image, the base will not be +# truncated. If you want the base image size to match the +# size of the smaller top, you can safely truncate it +# yourself once the commit operation successfully completes. +# # # @speed: #optional the maximum speed, in bytes per second # @@ -2009,6 +2037,17 @@ { 'command': 'drive-backup', 'data': 'DriveBackup' } ## +# @query-named-block-nodes +# +# Get the named block driver list +# +# Returns: the list of BlockDeviceInfo +# +# Since 2.0 +## +{ 'command': 'query-named-block-nodes', 'returns': [ 'BlockDeviceInfo' ] } + +## # @drive-mirror # # Start mirroring a block device's writes to a new destination. @@ -4090,6 +4129,7 @@ # @id: #optional id by which the new block device can be referred to. # This is a required option on the top level of blockdev-add, and # currently not allowed on any other level. +# @node-name: #optional the name of a block driver state node (Since 2.0) # @discard: #optional discard-related options (default: ignore) # @cache: #optional cache-related options # @aio: #optional AIO backend (default: threads) @@ -4105,6 +4145,7 @@ { 'type': 'BlockdevOptionsBase', 'data': { 'driver': 'str', '*id': 'str', + '*node-name': 'str', '*discard': 'BlockdevDiscardOptions', '*cache': 'BlockdevCacheOptions', '*aio': 'BlockdevAioOptions', @@ -4201,6 +4242,116 @@ '*pass-discard-other': 'bool' } } ## +# @BlkdebugEvent +# +# Trigger events supported by blkdebug. +## +{ 'enum': 'BlkdebugEvent', + 'data': [ 'l1_update', 'l1_grow.alloc_table', 'l1_grow.write_table', + 'l1_grow.activate_table', 'l2_load', 'l2_update', + 'l2_update_compressed', 'l2_alloc.cow_read', 'l2_alloc.write', + 'read_aio', 'read_backing_aio', 'read_compressed', 'write_aio', + 'write_compressed', 'vmstate_load', 'vmstate_save', 'cow_read', + 'cow_write', 'reftable_load', 'reftable_grow', 'reftable_update', + 'refblock_load', 'refblock_update', 'refblock_update_part', + 'refblock_alloc', 'refblock_alloc.hookup', 'refblock_alloc.write', + 'refblock_alloc.write_blocks', 'refblock_alloc.write_table', + 'refblock_alloc.switch_table', 'cluster_alloc', + 'cluster_alloc_bytes', 'cluster_free', 'flush_to_os', + 'flush_to_disk' ] } + +## +# @BlkdebugInjectErrorOptions +# +# Describes a single error injection for blkdebug. +# +# @event: trigger event +# +# @state: #optional the state identifier blkdebug needs to be in to +# actually trigger the event; defaults to "any" +# +# @errno: #optional error identifier (errno) to be returned; defaults to +# EIO +# +# @sector: #optional specifies the sector index which has to be affected +# in order to actually trigger the event; defaults to "any +# sector" +# +# @once: #optional disables further events after this one has been +# triggered; defaults to false +# +# @immediately: #optional fail immediately; defaults to false +# +# Since: 2.0 +## +{ 'type': 'BlkdebugInjectErrorOptions', + 'data': { 'event': 'BlkdebugEvent', + '*state': 'int', + '*errno': 'int', + '*sector': 'int', + '*once': 'bool', + '*immediately': 'bool' } } + +## +# @BlkdebugSetStateOptions +# +# Describes a single state-change event for blkdebug. +# +# @event: trigger event +# +# @state: #optional the current state identifier blkdebug needs to be in; +# defaults to "any" +# +# @new_state: the state identifier blkdebug is supposed to assume if +# this event is triggered +# +# Since: 2.0 +## +{ 'type': 'BlkdebugSetStateOptions', + 'data': { 'event': 'BlkdebugEvent', + '*state': 'int', + 'new_state': 'int' } } + +## +# @BlockdevOptionsBlkdebug +# +# Driver specific block device options for blkdebug. +# +# @image: underlying raw block device (or image file) +# +# @config: #optional filename of the configuration file +# +# @align: #optional required alignment for requests in bytes +# +# @inject-error: #optional array of error injection descriptions +# +# @set-state: #optional array of state-change descriptions +# +# Since: 2.0 +## +{ 'type': 'BlockdevOptionsBlkdebug', + 'data': { 'image': 'BlockdevRef', + '*config': 'str', + '*align': 'int', + '*inject-error': ['BlkdebugInjectErrorOptions'], + '*set-state': ['BlkdebugSetStateOptions'] } } + +## +# @BlockdevOptionsBlkverify +# +# Driver specific block device options for blkverify. +# +# @test: block device to be tested +# +# @raw: raw image used for verification +# +# Since: 2.0 +## +{ 'type': 'BlockdevOptionsBlkverify', + 'data': { 'test': 'BlockdevRef', + 'raw': 'BlockdevRef' } } + +## # @BlockdevOptions # # Options for creating a block device. @@ -4224,10 +4375,8 @@ # TODO sheepdog: Wait for structured options # TODO ssh: Should take InetSocketAddress for 'host'? 'vvfat': 'BlockdevOptionsVVFAT', - -# TODO blkdebug: Wait for structured options -# TODO blkverify: Wait for structured options - + 'blkdebug': 'BlockdevOptionsBlkdebug', + 'blkverify': 'BlockdevOptionsBlkverify', 'bochs': 'BlockdevOptionsGenericFormat', 'cloop': 'BlockdevOptionsGenericFormat', 'cow': 'BlockdevOptionsGenericCOWFormat', diff --git a/qemu-doc.texi b/qemu-doc.texi index 4e9c6e9..ce61f30 100644 --- a/qemu-doc.texi +++ b/qemu-doc.texi @@ -536,11 +536,11 @@ support of multiple VM snapshots. Supported options: @table @code @item compat -Determines the qcow2 version to use. @code{compat=0.10} uses the traditional -image format that can be read by any QEMU since 0.10 (this is the default). +Determines the qcow2 version to use. @code{compat=0.10} uses the +traditional image format that can be read by any QEMU since 0.10. @code{compat=1.1} enables image format extensions that only QEMU 1.1 and -newer understand. Amongst others, this includes zero clusters, which allow -efficient copy-on-read for sparse images. +newer understand (this is the default). Amongst others, this includes +zero clusters, which allow efficient copy-on-read for sparse images. @item backing_file File name of a base image (see @option{create} subcommand) diff --git a/qemu-img.texi b/qemu-img.texi index 1bba91e..526d56a 100644 --- a/qemu-img.texi +++ b/qemu-img.texi @@ -57,7 +57,9 @@ indicates that target image must be compressed (qcow format only) @item -h with or without a command shows help and lists the supported formats @item -p -display progress bar (convert and rebase commands only) +display progress bar (compare, convert and rebase commands only). +If the @var{-p} option is not used for a command that supports it, the +progress is reported when the process receives a @code{SIGUSR1} signal. @item -q Quiet mode - do not print any output (except errors). There's no progress bar in case both @var{-q} and @var{-p} options are used. @@ -140,7 +142,12 @@ it doesn't need to be specified separately in this case. @item commit [-f @var{fmt}] [-t @var{cache}] @var{filename} -Commit the changes recorded in @var{filename} in its base image. +Commit the changes recorded in @var{filename} in its base image or backing file. +If the backing file is smaller than the snapshot, then the backing file will be +resized to be the same size as the snapshot. If the snapshot is smaller than +the backing file, the backing file will not be truncated. If you want the +backing file to match the size of the smaller snapshot, you can safely truncate +it yourself once the commit operation successfully completes. @item compare [-f @var{fmt}] [-F @var{fmt}] [-p] [-s] [-q] @var{filename1} @var{filename2} @@ -391,11 +398,11 @@ support of multiple VM snapshots. Supported options: @table @code @item compat -Determines the qcow2 version to use. @code{compat=0.10} uses the traditional -image format that can be read by any QEMU since 0.10 (this is the default). +Determines the qcow2 version to use. @code{compat=0.10} uses the +traditional image format that can be read by any QEMU since 0.10. @code{compat=1.1} enables image format extensions that only QEMU 1.1 and -newer understand. Amongst others, this includes zero clusters, which allow -efficient copy-on-read for sparse images. +newer understand (this is the default). Amongst others, this includes zero +clusters, which allow efficient copy-on-read for sparse images. @item backing_file File name of a base image (see @option{create} subcommand) diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c index 85e4982..f1de24c 100644 --- a/qemu-io-cmds.c +++ b/qemu-io-cmds.c @@ -12,6 +12,7 @@ #include "block/block_int.h" #include "block/qapi.h" #include "qemu/main-loop.h" +#include "qemu/timer.h" #define CMD_NOFILE_OK 0x01 @@ -94,6 +95,21 @@ static const cmdinfo_t *find_command(const char *cmd) return NULL; } +/* Invoke fn() for commands with a matching prefix */ +void qemuio_complete_command(const char *input, + void (*fn)(const char *cmd, void *opaque), + void *opaque) +{ + cmdinfo_t *ct; + size_t input_len = strlen(input); + + for (ct = cmdtab; ct < &cmdtab[ncmds]; ct++) { + if (strncmp(input, ct->name, input_len) == 0) { + fn(ct->name, opaque); + } + } +} + static char **breakline(char *input, int *count) { int c = 0; @@ -2038,6 +2054,46 @@ static const cmdinfo_t abort_cmd = { .oneline = "simulate a program crash using abort(3)", }; +static void sleep_cb(void *opaque) +{ + bool *expired = opaque; + *expired = true; +} + +static int sleep_f(BlockDriverState *bs, int argc, char **argv) +{ + char *endptr; + long ms; + struct QEMUTimer *timer; + bool expired = false; + + ms = strtol(argv[1], &endptr, 0); + if (ms < 0 || *endptr != '\0') { + printf("%s is not a valid number\n", argv[1]); + return 0; + } + + timer = timer_new_ns(QEMU_CLOCK_HOST, sleep_cb, &expired); + timer_mod(timer, qemu_clock_get_ns(QEMU_CLOCK_HOST) + SCALE_MS * ms); + + while (!expired) { + main_loop_wait(false); + } + + timer_free(timer); + + return 0; +} + +static const cmdinfo_t sleep_cmd = { + .name = "sleep", + .argmin = 1, + .argmax = 1, + .cfunc = sleep_f, + .flags = CMD_NOFILE_OK, + .oneline = "waits for the given value in milliseconds", +}; + static void help_oneline(const char *cmd, const cmdinfo_t *ct) { if (cmd) { @@ -2151,4 +2207,5 @@ static void __attribute((constructor)) init_qemuio_commands(void) qemuio_add_command(&resume_cmd); qemuio_add_command(&wait_break_cmd); qemuio_add_command(&abort_cmd); + qemuio_add_command(&sleep_cmd); } @@ -18,6 +18,7 @@ #include "qemu/main-loop.h" #include "qemu/option.h" #include "qemu/config-file.h" +#include "qemu/readline.h" #include "block/block_int.h" #include "trace/control.h" @@ -32,6 +33,8 @@ extern int qemuio_misalign; static int ncmdline; static char **cmdline; +static ReadLineState *readline_state; + static int close_f(BlockDriverState *bs, int argc, char **argv) { bdrv_unref(bs); @@ -56,7 +59,7 @@ static int openfile(char *name, int flags, int growable, QDict *opts) } if (growable) { - if (bdrv_file_open(&qemuio_bs, name, opts, flags, &local_err)) { + if (bdrv_file_open(&qemuio_bs, name, NULL, opts, flags, &local_err)) { fprintf(stderr, "%s: can't open device %s: %s\n", progname, name, error_get_pretty(local_err)); error_free(local_err); @@ -160,11 +163,13 @@ static int open_f(BlockDriverState *bs, int argc, char **argv) flags |= BDRV_O_RDWR; } - if (optind != argc - 1) { + if (optind == argc - 1) { + return openfile(argv[optind], flags, growable, opts); + } else if (optind == argc) { + return openfile(NULL, flags, growable, opts); + } else { return qemuio_command_usage(&open_cmd); } - - return openfile(argv[optind], flags, growable, opts); } static int quit_f(BlockDriverState *bs, int argc, char **argv) @@ -203,14 +208,6 @@ static void usage(const char *name) name); } - -#if defined(ENABLE_READLINE) -# include <readline/history.h> -# include <readline/readline.h> -#elif defined(ENABLE_EDITLINE) -# include <histedit.h> -#endif - static char *get_prompt(void) { static char prompt[FILENAME_MAX + 2 /*"> "*/ + 1 /*"\0"*/ ]; @@ -222,52 +219,53 @@ static char *get_prompt(void) return prompt; } -#if defined(ENABLE_READLINE) -static char *fetchline(void) +static void readline_printf_func(void *opaque, const char *fmt, ...) { - char *line = readline(get_prompt()); - if (line && *line) { - add_history(line); - } - return line; + va_list ap; + va_start(ap, fmt); + vprintf(fmt, ap); + va_end(ap); } -#elif defined(ENABLE_EDITLINE) -static char *el_get_prompt(EditLine *e) + +static void readline_flush_func(void *opaque) { - return get_prompt(); + fflush(stdout); } -static char *fetchline(void) +static void readline_func(void *opaque, const char *str, void *readline_opaque) { - static EditLine *el; - static History *hist; - HistEvent hevent; - char *line; - int count; - - if (!el) { - hist = history_init(); - history(hist, &hevent, H_SETSIZE, 100); - el = el_init(progname, stdin, stdout, stderr); - el_source(el, NULL); - el_set(el, EL_SIGNAL, 1); - el_set(el, EL_PROMPT, el_get_prompt); - el_set(el, EL_HIST, history, (const char *)hist); - } - line = strdup(el_gets(el, &count)); - if (line) { - if (count > 0) { - line[count-1] = '\0'; - } - if (*line) { - history(hist, &hevent, H_ENTER, line); + char **line = readline_opaque; + *line = g_strdup(str); +} + +static void completion_match(const char *cmd, void *opaque) +{ + readline_add_completion(readline_state, cmd); +} + +static void readline_completion_func(void *opaque, const char *str) +{ + readline_set_completion_index(readline_state, strlen(str)); + qemuio_complete_command(str, completion_match, NULL); +} + +static char *fetchline_readline(void) +{ + char *line = NULL; + + readline_start(readline_state, get_prompt(), 0, readline_func, &line); + while (!line) { + int ch = getchar(); + if (ch == EOF) { + break; } + readline_handle_byte(readline_state, ch); } return line; } -#else -# define MAXREADLINESZ 1024 -static char *fetchline(void) + +#define MAXREADLINESZ 1024 +static char *fetchline_fgets(void) { char *p, *line = g_malloc(MAXREADLINESZ); @@ -283,7 +281,15 @@ static char *fetchline(void) return line; } -#endif + +static char *fetchline(void) +{ + if (readline_state) { + return fetchline_readline(); + } else { + return fetchline_fgets(); + } +} static void prep_fetchline(void *opaque) { @@ -339,6 +345,11 @@ static void add_user_command(char *optarg) cmdline[ncmdline-1] = optarg; } +static void reenable_tty_echo(void) +{ + qemu_set_tty_echo(STDIN_FILENO, true); +} + int main(int argc, char **argv) { int readonly = 0; @@ -435,6 +446,15 @@ int main(int argc, char **argv) qemuio_add_command(&open_cmd); qemuio_add_command(&close_cmd); + if (isatty(STDIN_FILENO)) { + readline_state = readline_init(readline_printf_func, + readline_flush_func, + NULL, + readline_completion_func); + qemu_set_tty_echo(STDIN_FILENO, false); + atexit(reenable_tty_echo); + } + /* open the device */ if (!readonly) { flags |= BDRV_O_RDWR; @@ -453,5 +473,6 @@ int main(int argc, char **argv) if (qemuio_bs) { bdrv_unref(qemuio_bs); } + g_free(readline_state); return 0; } diff --git a/qmp-commands.hx b/qmp-commands.hx index 02cc815..cce6b81 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -931,7 +931,7 @@ EQMP { .name = "block_resize", - .args_type = "device:B,size:o", + .args_type = "device:s?,node-name:s?,size:o", .mhandler.cmd_new = qmp_marshal_input_block_resize, }, @@ -944,6 +944,7 @@ Resize a block image while a guest is running. Arguments: - "device": the device's ID, must be unique (json-string) +- "node-name": the node name in the block driver state graph (json-string) - "size": new size Example: @@ -965,6 +966,45 @@ EQMP .mhandler.cmd_new = qmp_marshal_input_block_commit, }, +SQMP +block-commit +------------ + +Live commit of data from overlay image nodes into backing nodes - i.e., writes +data between 'top' and 'base' into 'base'. + +Arguments: + +- "device": The device's ID, must be unique (json-string) +- "base": The file name of the backing image to write data into. + If not specified, this is the deepest backing image + (json-string, optional) +- "top": The file name of the backing image within the image chain, + which contains the topmost data to be committed down. + + If top == base, that is an error. + If top == active, the job will not be completed by itself, + user needs to complete the job with the block-job-complete + command after getting the ready event. (Since 2.0) + + If the base image is smaller than top, then the base image + will be resized to be the same size as top. If top is + smaller than the base image, the base will not be + truncated. If you want the base image size to match the + size of the smaller top, you can safely truncate it + yourself once the commit operation successfully completes. + (json-string) +- "speed": the maximum speed, in bytes per second (json-int, optional) + + +Example: + +-> { "execute": "block-commit", "arguments": { "device": "virtio0", + "top": "/tmp/snap1.qcow2" } } +<- { "return": {} } + +EQMP + { .name = "drive-backup", .args_type = "sync:s,device:B,target:s,speed:i?,mode:s?,format:s?," @@ -1088,7 +1128,9 @@ actions array: - "data": a dictionary. The contents depend on the value of "type". When "type" is "blockdev-snapshot-sync": - "device": device name to snapshot (json-string) + - "node-name": graph node name to snapshot (json-string) - "snapshot-file": name of new image file (json-string) + - "snapshot-node-name": graph node name of the new snapshot (json-string) - "format": format of new image (json-string, optional) - "mode": whether and how QEMU should create the snapshot file (NewImageMode, optional, default "absolute-paths") @@ -1103,6 +1145,11 @@ Example: { 'type': 'blockdev-snapshot-sync', 'data' : { "device": "ide-hd0", "snapshot-file": "/some/place/my-image", "format": "qcow2" } }, + { 'type': 'blockdev-snapshot-sync', 'data' : { "node-name": "myfile", + "snapshot-file": "/some/place/my-image2", + "snapshot-node-name": "node3432", + "mode": "existing", + "format": "qcow2" } }, { 'type': 'blockdev-snapshot-sync', 'data' : { "device": "ide-hd1", "snapshot-file": "/some/place/my-image2", "mode": "existing", @@ -1116,7 +1163,7 @@ EQMP { .name = "blockdev-snapshot-sync", - .args_type = "device:B,snapshot-file:s,format:s?,mode:s?", + .args_type = "device:s?,node-name:s?,snapshot-file:s,snapshot-node-name:s?,format:s?,mode:s?", .mhandler.cmd_new = qmp_marshal_input_blockdev_snapshot_sync, }, @@ -1133,7 +1180,9 @@ snapshot image, default is qcow2. Arguments: - "device": device name to snapshot (json-string) +- "node-name": graph node name to snapshot (json-string) - "snapshot-file": name of new image file (json-string) +- "snapshot-node-name": graph node name of the new snapshot (json-string) - "mode": whether and how QEMU should create the snapshot file (NewImageMode, optional, default "absolute-paths") - "format": format of new image (json-string, optional) @@ -1503,7 +1552,7 @@ EQMP { .name = "block_passwd", - .args_type = "device:B,password:s", + .args_type = "device:s?,node-name:s?,password:s", .mhandler.cmd_new = qmp_marshal_input_block_passwd, }, @@ -1516,6 +1565,7 @@ Set the password of encrypted block devices. Arguments: - "device": device name (json-string) +- "node-name": name in the block driver state graph (json-string) - "password": password (json-string) Example: @@ -3346,3 +3396,64 @@ Example (2): <- { "return": {} } EQMP + + { + .name = "query-named-block-nodes", + .args_type = "", + .mhandler.cmd_new = qmp_marshal_input_query_named_block_nodes, + }, + +SQMP +@query-named-block-nodes +------------------------ + +Return a list of BlockDeviceInfo for all the named block driver nodes + +Example: + +-> { "execute": "query-named-block-nodes" } +<- { "return": [ { "ro":false, + "drv":"qcow2", + "encrypted":false, + "file":"disks/test.qcow2", + "node-name": "my-node", + "backing_file_depth":1, + "bps":1000000, + "bps_rd":0, + "bps_wr":0, + "iops":1000000, + "iops_rd":0, + "iops_wr":0, + "bps_max": 8000000, + "bps_rd_max": 0, + "bps_wr_max": 0, + "iops_max": 0, + "iops_rd_max": 0, + "iops_wr_max": 0, + "iops_size": 0, + "image":{ + "filename":"disks/test.qcow2", + "format":"qcow2", + "virtual-size":2048000, + "backing_file":"base.qcow2", + "full-backing-filename":"disks/base.qcow2", + "backing-filename-format:"qcow2", + "snapshots":[ + { + "id": "1", + "name": "snapshot1", + "vm-state-size": 0, + "date-sec": 10000200, + "date-nsec": 12, + "vm-clock-sec": 206, + "vm-clock-nsec": 30 + } + ], + "backing-image":{ + "filename":"disks/base.qcow2", + "format":"qcow2", + "virtual-size":2048000 + } + } }Â ] } + +EQMP diff --git a/qobject/qdict.c b/qobject/qdict.c index 17e14f0..a3924f2 100644 --- a/qobject/qdict.c +++ b/qobject/qdict.c @@ -477,7 +477,43 @@ static void qdict_destroy_obj(QObject *obj) g_free(qdict); } -static void qdict_do_flatten(QDict *qdict, QDict *target, const char *prefix) +static void qdict_flatten_qdict(QDict *qdict, QDict *target, + const char *prefix); + +static void qdict_flatten_qlist(QList *qlist, QDict *target, const char *prefix) +{ + QObject *value; + const QListEntry *entry; + char *new_key; + int i; + + /* This function is never called with prefix == NULL, i.e., it is always + * called from within qdict_flatten_q(list|dict)(). Therefore, it does not + * need to remove list entries during the iteration (the whole list will be + * deleted eventually anyway from qdict_flatten_qdict()). */ + assert(prefix); + + entry = qlist_first(qlist); + + for (i = 0; entry; entry = qlist_next(entry), i++) { + value = qlist_entry_obj(entry); + new_key = g_strdup_printf("%s.%i", prefix, i); + + if (qobject_type(value) == QTYPE_QDICT) { + qdict_flatten_qdict(qobject_to_qdict(value), target, new_key); + } else if (qobject_type(value) == QTYPE_QLIST) { + qdict_flatten_qlist(qobject_to_qlist(value), target, new_key); + } else { + /* All other types are moved to the target unchanged. */ + qobject_incref(value); + qdict_put_obj(target, new_key, value); + } + + g_free(new_key); + } +} + +static void qdict_flatten_qdict(QDict *qdict, QDict *target, const char *prefix) { QObject *value; const QDictEntry *entry, *next; @@ -500,8 +536,12 @@ static void qdict_do_flatten(QDict *qdict, QDict *target, const char *prefix) if (qobject_type(value) == QTYPE_QDICT) { /* Entries of QDicts are processed recursively, the QDict object * itself disappears. */ - qdict_do_flatten(qobject_to_qdict(value), target, - new_key ? new_key : entry->key); + qdict_flatten_qdict(qobject_to_qdict(value), target, + new_key ? new_key : entry->key); + delete = true; + } else if (qobject_type(value) == QTYPE_QLIST) { + qdict_flatten_qlist(qobject_to_qlist(value), target, + new_key ? new_key : entry->key); delete = true; } else if (prefix) { /* All other objects are moved to the target unchanged. */ @@ -526,12 +566,14 @@ static void qdict_do_flatten(QDict *qdict, QDict *target, const char *prefix) /** * qdict_flatten(): For each nested QDict with key x, all fields with key y - * are moved to this QDict and their key is renamed to "x.y". This operation - * is applied recursively for nested QDicts. + * are moved to this QDict and their key is renamed to "x.y". For each nested + * QList with key x, the field at index y is moved to this QDict with the key + * "x.y" (i.e., the reverse of what qdict_array_split() does). + * This operation is applied recursively for nested QDicts and QLists. */ void qdict_flatten(QDict *qdict) { - qdict_do_flatten(qdict, qdict, NULL); + qdict_flatten_qdict(qdict, qdict, NULL); } /* extract all the src QDict entries starting by start into dst */ @@ -554,3 +596,40 @@ void qdict_extract_subqdict(QDict *src, QDict **dst, const char *start) entry = next; } } + +/** + * qdict_array_split(): This function moves array-like elements of a QDict into + * a new QList of QDicts. Every entry in the original QDict with a key prefixed + * "%u.", where %u designates an unsigned integer starting at 0 and + * incrementally counting up, will be moved to a new QDict at index %u in the + * output QList with the key prefix removed. The function terminates when there + * is no entry in the QDict with a prefix directly (incrementally) following the + * last one. + * Example: {"0.a": 42, "0.b": 23, "1.x": 0, "3.y": 1, "o.o": 7} + * (or {"1.x": 0, "3.y": 1, "0.a": 42, "o.o": 7, "0.b": 23}) + * => [{"a": 42, "b": 23}, {"x": 0}] + * and {"3.y": 1, "o.o": 7} (remainder of the old QDict) + */ +void qdict_array_split(QDict *src, QList **dst) +{ + unsigned i; + + *dst = qlist_new(); + + for (i = 0; i < UINT_MAX; i++) { + QDict *subqdict; + char prefix[32]; + size_t snprintf_ret; + + snprintf_ret = snprintf(prefix, 32, "%u.", i); + assert(snprintf_ret < 32); + + qdict_extract_subqdict(src, &subqdict, prefix); + if (!qdict_size(subqdict)) { + QDECREF(subqdict); + break; + } + + qlist_append_obj(*dst, QOBJECT(subqdict)); + } +} diff --git a/scripts/qapi.py b/scripts/qapi.py index 750e9fb..9b3de4c 100644 --- a/scripts/qapi.py +++ b/scripts/qapi.py @@ -247,7 +247,7 @@ def c_var(name, protect=True): 'and', 'and_eq', 'bitand', 'bitor', 'compl', 'not', 'not_eq', 'or', 'or_eq', 'xor', 'xor_eq']) # namespace pollution: - polluted_words = set(['unix']) + polluted_words = set(['unix', 'errno']) if protect and (name in c89_words | c99_words | c11_words | gcc_words | cpp_words | polluted_words): return "q_" + name return name.replace('-', '_').lstrip("*") diff --git a/tests/check-qdict.c b/tests/check-qdict.c index dc5f05a..7a7461b 100644 --- a/tests/check-qdict.c +++ b/tests/check-qdict.c @@ -227,6 +227,160 @@ static void qdict_iterapi_test(void) QDECREF(tests_dict); } +static void qdict_flatten_test(void) +{ + QList *list1 = qlist_new(); + QList *list2 = qlist_new(); + QDict *dict1 = qdict_new(); + QDict *dict2 = qdict_new(); + QDict *dict3 = qdict_new(); + + /* + * Test the flattening of + * + * { + * "e": [ + * 42, + * [ + * 23, + * 66, + * { + * "a": 0, + * "b": 1 + * } + * ] + * ], + * "f": { + * "c": 2, + * "d": 3, + * }, + * "g": 4 + * } + * + * to + * + * { + * "e.0": 42, + * "e.1.0": 23, + * "e.1.1": 66, + * "e.1.2.a": 0, + * "e.1.2.b": 1, + * "f.c": 2, + * "f.d": 3, + * "g": 4 + * } + */ + + qdict_put(dict1, "a", qint_from_int(0)); + qdict_put(dict1, "b", qint_from_int(1)); + + qlist_append_obj(list1, QOBJECT(qint_from_int(23))); + qlist_append_obj(list1, QOBJECT(qint_from_int(66))); + qlist_append_obj(list1, QOBJECT(dict1)); + qlist_append_obj(list2, QOBJECT(qint_from_int(42))); + qlist_append_obj(list2, QOBJECT(list1)); + + qdict_put(dict2, "c", qint_from_int(2)); + qdict_put(dict2, "d", qint_from_int(3)); + qdict_put_obj(dict3, "e", QOBJECT(list2)); + qdict_put_obj(dict3, "f", QOBJECT(dict2)); + qdict_put(dict3, "g", qint_from_int(4)); + + qdict_flatten(dict3); + + g_assert(qdict_get_int(dict3, "e.0") == 42); + g_assert(qdict_get_int(dict3, "e.1.0") == 23); + g_assert(qdict_get_int(dict3, "e.1.1") == 66); + g_assert(qdict_get_int(dict3, "e.1.2.a") == 0); + g_assert(qdict_get_int(dict3, "e.1.2.b") == 1); + g_assert(qdict_get_int(dict3, "f.c") == 2); + g_assert(qdict_get_int(dict3, "f.d") == 3); + g_assert(qdict_get_int(dict3, "g") == 4); + + g_assert(qdict_size(dict3) == 8); + + QDECREF(dict3); +} + +static void qdict_array_split_test(void) +{ + QDict *test_dict = qdict_new(); + QDict *dict1, *dict2; + QList *test_list; + + /* + * Test the split of + * + * { + * "1.x": 0, + * "3.y": 1, + * "0.a": 42, + * "o.o": 7, + * "0.b": 23 + * } + * + * to + * + * [ + * { + * "a": 42, + * "b": 23 + * }, + * { + * "x": 0 + * } + * ] + * + * and + * + * { + * "3.y": 1, + * "o.o": 7 + * } + * + * (remaining in the old QDict) + * + * This example is given in the comment of qdict_array_split(). + */ + + qdict_put(test_dict, "1.x", qint_from_int(0)); + qdict_put(test_dict, "3.y", qint_from_int(1)); + qdict_put(test_dict, "0.a", qint_from_int(42)); + qdict_put(test_dict, "o.o", qint_from_int(7)); + qdict_put(test_dict, "0.b", qint_from_int(23)); + + qdict_array_split(test_dict, &test_list); + + dict1 = qobject_to_qdict(qlist_pop(test_list)); + dict2 = qobject_to_qdict(qlist_pop(test_list)); + + g_assert(dict1); + g_assert(dict2); + g_assert(qlist_empty(test_list)); + + QDECREF(test_list); + + g_assert(qdict_get_int(dict1, "a") == 42); + g_assert(qdict_get_int(dict1, "b") == 23); + + g_assert(qdict_size(dict1) == 2); + + QDECREF(dict1); + + g_assert(qdict_get_int(dict2, "x") == 0); + + g_assert(qdict_size(dict2) == 1); + + QDECREF(dict2); + + g_assert(qdict_get_int(test_dict, "3.y") == 1); + g_assert(qdict_get_int(test_dict, "o.o") == 7); + + g_assert(qdict_size(test_dict) == 2); + + QDECREF(test_dict); +} + /* * Errors test-cases */ @@ -365,6 +519,8 @@ int main(int argc, char **argv) g_test_add_func("/public/del", qdict_del_test); g_test_add_func("/public/to_qdict", qobject_to_qdict_test); g_test_add_func("/public/iterapi", qdict_iterapi_test); + g_test_add_func("/public/flatten", qdict_flatten_test); + g_test_add_func("/public/array_split", qdict_array_split_test); g_test_add_func("/errors/put_exists", qdict_put_exists_test); g_test_add_func("/errors/get_not_exists", qdict_get_not_exists_test); diff --git a/tests/fdc-test.c b/tests/fdc-test.c index 38b5b17..37096dc 100644 --- a/tests/fdc-test.c +++ b/tests/fdc-test.c @@ -518,7 +518,6 @@ static void fuzz_registers(void) int main(int argc, char **argv) { const char *arch = qtest_get_arch(); - char *cmdline; int fd; int ret; @@ -538,9 +537,7 @@ int main(int argc, char **argv) /* Run the tests */ g_test_init(&argc, &argv, NULL); - cmdline = g_strdup_printf("-vnc none "); - - qtest_start(cmdline); + qtest_start(NULL); qtest_irq_intercept_in(global_qtest, "ioapic"); qtest_add_func("/fdc/cmos", test_cmos); qtest_add_func("/fdc/no_media_on_start", test_no_media_on_start); diff --git a/tests/ide-test.c b/tests/ide-test.c index d5cec5a..4a0d97f 100644 --- a/tests/ide-test.c +++ b/tests/ide-test.c @@ -380,7 +380,6 @@ static void test_bmdma_no_busmaster(void) static void test_bmdma_setup(void) { ide_test_start( - "-vnc none " "-drive file=%s,if=ide,serial=%s,cache=writeback " "-global ide-hd.ver=%s", tmp_path, "testdisk", "version"); @@ -410,7 +409,6 @@ static void test_identify(void) int ret; ide_test_start( - "-vnc none " "-drive file=%s,if=ide,serial=%s,cache=writeback " "-global ide-hd.ver=%s", tmp_path, "testdisk", "version"); @@ -455,7 +453,6 @@ static void test_flush(void) uint8_t data; ide_test_start( - "-vnc none " "-drive file=blkdebug::%s,if=ide,cache=writeback", tmp_path); diff --git a/tests/qemu-iotests/017 b/tests/qemu-iotests/017 index aba3faf..3af3cdf 100755 --- a/tests/qemu-iotests/017 +++ b/tests/qemu-iotests/017 @@ -43,6 +43,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 _supported_fmt qcow qcow2 vmdk qed _supported_proto generic _supported_os Linux +_unsupported_imgopts "subformat=monolithicFlat" "subformat=twoGbMaxExtentFlat" TEST_OFFSETS="0 4294967296" diff --git a/tests/qemu-iotests/018 b/tests/qemu-iotests/018 index 15fcfe5..6f7f054 100755 --- a/tests/qemu-iotests/018 +++ b/tests/qemu-iotests/018 @@ -43,6 +43,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 _supported_fmt qcow qcow2 vmdk qed _supported_proto generic _supported_os Linux +_unsupported_imgopts "subformat=monolithicFlat" "subformat=twoGbMaxExtentFlat" TEST_OFFSETS="0 4294967296" diff --git a/tests/qemu-iotests/019 b/tests/qemu-iotests/019 index 5bb18d0..b43e70f 100755 --- a/tests/qemu-iotests/019 +++ b/tests/qemu-iotests/019 @@ -47,6 +47,9 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 _supported_fmt qcow qcow2 vmdk qed _supported_proto generic _supported_os Linux +_unsupported_imgopts "subformat=monolithicFlat" \ + "subformat=twoGbMaxExtentFlat" \ + "subformat=twoGbMaxExtentSparse" TEST_OFFSETS="0 4294967296" CLUSTER_SIZE=65536 diff --git a/tests/qemu-iotests/020 b/tests/qemu-iotests/020 index b3c86d8..73a0429 100755 --- a/tests/qemu-iotests/020 +++ b/tests/qemu-iotests/020 @@ -45,6 +45,9 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 _supported_fmt qcow qcow2 vmdk qed _supported_proto generic _supported_os Linux +_unsupported_imgopts "subformat=monolithicFlat" \ + "subformat=twoGbMaxExtentFlat" \ + "subformat=twoGbMaxExtentSparse" TEST_OFFSETS="0 4294967296" diff --git a/tests/qemu-iotests/034 b/tests/qemu-iotests/034 index 67f1959..7349789 100755 --- a/tests/qemu-iotests/034 +++ b/tests/qemu-iotests/034 @@ -41,6 +41,9 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 _supported_fmt qcow qcow2 vmdk qed _supported_proto generic _supported_os Linux +_unsupported_imgopts "subformat=monolithicFlat" \ + "subformat=twoGbMaxExtentFlat" \ + "subformat=twoGbMaxExtentSparse" CLUSTER_SIZE=4k size=128M diff --git a/tests/qemu-iotests/037 b/tests/qemu-iotests/037 index 743bae3..e444349 100755 --- a/tests/qemu-iotests/037 +++ b/tests/qemu-iotests/037 @@ -41,6 +41,9 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 _supported_fmt qcow qcow2 vmdk qed _supported_proto generic _supported_os Linux +_unsupported_imgopts "subformat=monolithicFlat" \ + "subformat=twoGbMaxExtentFlat" \ + "subformat=twoGbMaxExtentSparse" CLUSTER_SIZE=4k size=128M diff --git a/tests/qemu-iotests/051.out b/tests/qemu-iotests/051.out index c2cadba..d0c5173 100644 --- a/tests/qemu-iotests/051.out +++ b/tests/qemu-iotests/051.out @@ -222,7 +222,7 @@ QEMU X.Y.Z monitor - type 'help' for more information (qemu) q[K[Dqu[K[D[Dqui[K[D[D[Dquit[K Testing: -drive file=TEST_DIR/t.qcow2,file.driver=qcow2 -QEMU_PROG: -drive file=TEST_DIR/t.qcow2,file.driver=qcow2: could not open disk image TEST_DIR/t.qcow2: Can't use 'qcow2' as a block driver for the protocol level +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,file.driver=qcow2: could not open disk image TEST_DIR/t.qcow2: Block format 'qcow2' used by device '' doesn't support the option 'filename' === Parsing protocol from file name === diff --git a/tests/qemu-iotests/059 b/tests/qemu-iotests/059 index 65bea1d..2d604d3 100755 --- a/tests/qemu-iotests/059 +++ b/tests/qemu-iotests/059 @@ -42,6 +42,9 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 _supported_fmt vmdk _supported_proto generic _supported_os Linux +_unsupported_imgopts "subformat=monolithicFlat" \ + "subformat=twoGbMaxExtentFlat" \ + "subformat=twoGbMaxExtentSparse" capacity_offset=16 granularity_offset=20 @@ -95,10 +98,23 @@ EOF _img_info echo +echo "=== Testing truncated sparse ===" +IMGOPTS="subformat=monolithicSparse" _make_test_img 100G +truncate -s 10M $TEST_IMG +_img_info + +echo echo "=== Testing version 3 ===" _use_sample_img iotest-version3.vmdk.bz2 _img_info +echo +echo "=== Testing 4TB monolithicFlat creation and IO ===" +IMGOPTS="subformat=monolithicFlat" _make_test_img 4T +_img_info +$QEMU_IO -c "write -P 0xa 900G 512" "$TEST_IMG" | _filter_qemu_io +$QEMU_IO -c "read -v 900G 1024" "$TEST_IMG" | _filter_qemu_io + # success, all done echo "*** done" rm -f $seq.full diff --git a/tests/qemu-iotests/059.out b/tests/qemu-iotests/059.out index 16ab7c6..4ffeb54 100644 --- a/tests/qemu-iotests/059.out +++ b/tests/qemu-iotests/059.out @@ -2043,8 +2043,87 @@ qemu-img: Could not open 'TEST_DIR/t.IMGFMT': Invalid extent lines: RW 12582912 VMFS "dummy.IMGFMT" 1 +=== Testing truncated sparse === +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=107374182400 +qemu-img: File truncated, expecting at least 13172736 bytes +qemu-img: Could not open 'TEST_DIR/t.IMGFMT': Could not open 'TEST_DIR/t.IMGFMT': Wrong medium type + === Testing version 3 === image: TEST_DIR/iotest-version3.IMGFMT file format: IMGFMT virtual size: 1.0G (1073741824 bytes) + +=== Testing 4TB monolithicFlat creation and IO === +Formatting 'TEST_DIR/iotest-version3.IMGFMT', fmt=IMGFMT size=4398046511104 +image: TEST_DIR/iotest-version3.IMGFMT +file format: IMGFMT +virtual size: 4.0T (4398046511104 bytes) +wrote 512/512 bytes at offset 966367641600 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +e100000000: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e100000010: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e100000020: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e100000030: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e100000040: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e100000050: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e100000060: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e100000070: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e100000080: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e100000090: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e1000000a0: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e1000000b0: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e1000000c0: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e1000000d0: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e1000000e0: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e1000000f0: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e100000100: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e100000110: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e100000120: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e100000130: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e100000140: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e100000150: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e100000160: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e100000170: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e100000180: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e100000190: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e1000001a0: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e1000001b0: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e1000001c0: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e1000001d0: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e1000001e0: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e1000001f0: 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a 0a ................ +e100000200: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e100000210: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e100000220: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e100000230: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e100000240: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e100000250: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e100000260: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e100000270: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e100000280: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e100000290: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e1000002a0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e1000002b0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e1000002c0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e1000002d0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e1000002e0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e1000002f0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e100000300: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e100000310: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e100000320: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e100000330: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e100000340: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e100000350: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e100000360: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e100000370: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e100000380: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e100000390: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e1000003a0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e1000003b0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e1000003c0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e1000003d0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e1000003e0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +e1000003f0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +read 1024/1024 bytes at offset 966367641600 +1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) *** done diff --git a/tests/qemu-iotests/063 b/tests/qemu-iotests/063 index 2ab8f20..77503a2 100755 --- a/tests/qemu-iotests/063 +++ b/tests/qemu-iotests/063 @@ -44,6 +44,9 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 _supported_fmt qcow qcow2 vmdk qed raw _supported_proto generic _supported_os Linux +_unsupported_imgopts "subformat=monolithicFlat" \ + "subformat=twoGbMaxExtentFlat" \ + "subformat=twoGbMaxExtentSparse" _make_test_img 4M diff --git a/tests/qemu-iotests/069 b/tests/qemu-iotests/069 index 3042803..50347d9 100755 --- a/tests/qemu-iotests/069 +++ b/tests/qemu-iotests/069 @@ -41,6 +41,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 _supported_fmt cow qed qcow qcow2 vmdk _supported_proto generic _supported_os Linux +_unsupported_imgopts "subformat=monolithicFlat" "subformat=twoGbMaxExtentFlat" IMG_SIZE=128K diff --git a/tests/qemu-iotests/071 b/tests/qemu-iotests/071 new file mode 100755 index 0000000..2a22546 --- /dev/null +++ b/tests/qemu-iotests/071 @@ -0,0 +1,239 @@ +#!/bin/bash +# +# Test case for the QMP blkdebug and blkverify interfaces +# +# Copyright (C) 2013 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +# creator +owner=mreitz@redhat.com + +seq="$(basename $0)" +echo "QA output created by $seq" + +here="$PWD" +tmp=/tmp/$$ +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter + +_supported_fmt generic +_supported_proto generic +_supported_os Linux + +function do_run_qemu() +{ + echo Testing: "$@" | _filter_imgfmt + $QEMU -nographic -qmp stdio -serial none "$@" + echo +} + +function run_qemu() +{ + do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qmp | _filter_qemu_io +} + +IMG_SIZE=64M + +echo +echo "=== Testing blkverify through filename ===" +echo + +TEST_IMG="$TEST_IMG.base" IMGOPTS="" IMGFMT="raw" _make_test_img $IMG_SIZE |\ + _filter_imgfmt +_make_test_img $IMG_SIZE +$QEMU_IO -c "open -o file.driver=blkverify,file.raw.filename=$TEST_IMG.base $TEST_IMG" \ + -c 'read 0 512' -c 'write -P 42 0x38000 512' -c 'read -P 42 0x38000 512' | _filter_qemu_io + +$QEMU_IO -c 'write -P 42 0 512' "$TEST_IMG" | _filter_qemu_io + +$QEMU_IO -c "open -o file.driver=blkverify,file.raw.filename=$TEST_IMG.base $TEST_IMG" \ + -c 'read -P 42 0 512' | _filter_qemu_io + +echo +echo "=== Testing blkverify through file blockref ===" +echo + +TEST_IMG="$TEST_IMG.base" IMGOPTS="" IMGFMT="raw" _make_test_img $IMG_SIZE |\ + _filter_imgfmt +_make_test_img $IMG_SIZE +$QEMU_IO -c "open -o file.driver=blkverify,file.raw.filename=$TEST_IMG.base,file.test.driver=$IMGFMT,file.test.file.filename=$TEST_IMG" \ + -c 'read 0 512' -c 'write -P 42 0x38000 512' -c 'read -P 42 0x38000 512' | _filter_qemu_io + +$QEMU_IO -c 'write -P 42 0 512' "$TEST_IMG" | _filter_qemu_io + +$QEMU_IO -c "open -o file.driver=blkverify,file.raw.filename=$TEST_IMG.base $TEST_IMG" \ + -c 'read -P 42 0 512' | _filter_qemu_io + +echo +echo "=== Testing blkdebug through filename ===" +echo + +$QEMU_IO -c "open -o file.driver=blkdebug,file.inject-error.event=l2_load $TEST_IMG" \ + -c 'read -P 42 0x38000 512' + +echo +echo "=== Testing blkdebug through file blockref ===" +echo + +$QEMU_IO -c "open -o driver=$IMGFMT,file.driver=blkdebug,file.inject-error.event=l2_load,file.image.filename=$TEST_IMG" \ + -c 'read -P 42 0x38000 512' + +echo +echo "=== Testing blkdebug on existing block device ===" +echo + +run_qemu -drive "file=$TEST_IMG,format=raw,if=none,id=drive0" <<EOF +{ "execute": "qmp_capabilities" } +{ "execute": "blockdev-add", + "arguments": { + "options": { + "driver": "$IMGFMT", + "id": "drive0-debug", + "file": { + "driver": "blkdebug", + "image": "drive0", + "inject-error": [{ + "event": "l2_load" + }] + } + } + } +} +{ "execute": "human-monitor-command", + "arguments": { + "command-line": 'qemu-io drive0-debug "read 0 512"' + } +} +{ "execute": "quit" } +EOF + +echo +echo "=== Testing blkverify on existing block device ===" +echo + +run_qemu -drive "file=$TEST_IMG,format=$IMGFMT,if=none,id=drive0" <<EOF +{ "execute": "qmp_capabilities" } +{ "execute": "blockdev-add", + "arguments": { + "options": { + "driver": "blkverify", + "id": "drive0-verify", + "test": "drive0", + "raw": { + "driver": "raw", + "file": { + "driver": "file", + "filename": "$TEST_IMG.base" + } + } + } + } +} +{ "execute": "human-monitor-command", + "arguments": { + "command-line": 'qemu-io drive0-verify "read 0 512"' + } +} +{ "execute": "quit" } +EOF + +echo +echo "=== Testing blkverify on existing raw block device ===" +echo + +run_qemu -drive "file=$TEST_IMG.base,if=none,id=drive0" <<EOF +{ "execute": "qmp_capabilities" } +{ "execute": "blockdev-add", + "arguments": { + "options": { + "driver": "blkverify", + "id": "drive0-verify", + "test": { + "driver": "$IMGFMT", + "file": { + "driver": "file", + "filename": "$TEST_IMG" + } + }, + "raw": "drive0" + } + } +} +{ "execute": "human-monitor-command", + "arguments": { + "command-line": 'qemu-io drive0-verify "read 0 512"' + } +} +{ "execute": "quit" } +EOF + +echo +echo "=== Testing blkdebug's set-state through QMP ===" +echo + +run_qemu -drive "file=$TEST_IMG,format=raw,if=none,id=drive0" <<EOF +{ "execute": "qmp_capabilities" } +{ "execute": "blockdev-add", + "arguments": { + "options": { + "driver": "$IMGFMT", + "id": "drive0-debug", + "file": { + "driver": "blkdebug", + "image": "drive0", + "inject-error": [{ + "event": "read_aio", + "state": 42 + }], + "set-state": [{ + "event": "write_aio", + "new_state": 42 + }] + } + } + } +} +{ "execute": "human-monitor-command", + "arguments": { + "command-line": 'qemu-io drive0-debug "read 0 512"' + } +} +{ "execute": "human-monitor-command", + "arguments": { + "command-line": 'qemu-io drive0-debug "write 0 512"' + } +} +{ "execute": "human-monitor-command", + "arguments": { + "command-line": 'qemu-io drive0-debug "read 0 512"' + } +} +{ "execute": "quit" } +EOF + +# success, all done +echo "*** done" +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/071.out b/tests/qemu-iotests/071.out new file mode 100644 index 0000000..5f840a9 --- /dev/null +++ b/tests/qemu-iotests/071.out @@ -0,0 +1,90 @@ +QA output created by 071 + +=== Testing blkverify through filename === + +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +read 512/512 bytes at offset 0 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 512/512 bytes at offset 229376 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 229376 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 512/512 bytes at offset 0 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +blkverify: read sector_num=0 nb_sectors=4 contents mismatch in sector 0 + +=== Testing blkverify through file blockref === + +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +read 512/512 bytes at offset 0 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 512/512 bytes at offset 229376 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 229376 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 512/512 bytes at offset 0 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +blkverify: read sector_num=0 nb_sectors=4 contents mismatch in sector 0 + +=== Testing blkdebug through filename === + +read failed: Input/output error + +=== Testing blkdebug through file blockref === + +read failed: Input/output error + +=== Testing blkdebug on existing block device === + +Testing: -drive file=TEST_DIR/t.IMGFMT,format=raw,if=none,id=drive0 +QMP_VERSION +{"return": {}} +{"return": {}} +read failed: Input/output error +{"return": ""} +{"return": {}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "DEVICE_TRAY_MOVED", "data": {"device": "ide1-cd0", "tray-open": true}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "DEVICE_TRAY_MOVED", "data": {"device": "floppy0", "tray-open": true}} + + +=== Testing blkverify on existing block device === + +Testing: -drive file=TEST_DIR/t.IMGFMT,format=IMGFMT,if=none,id=drive0 +QMP_VERSION +{"return": {}} +{"return": {}} +blkverify: read sector_num=0 nb_sectors=1 contents mismatch in sector 0 + + +=== Testing blkverify on existing raw block device === + +Testing: -drive file=TEST_DIR/t.IMGFMT.base,if=none,id=drive0 +QMP_VERSION +{"return": {}} +{"return": {}} +blkverify: read sector_num=0 nb_sectors=1 contents mismatch in sector 0 + + +=== Testing blkdebug's set-state through QMP === + +Testing: -drive file=TEST_DIR/t.IMGFMT,format=raw,if=none,id=drive0 +QMP_VERSION +{"return": {}} +{"return": {}} +read 512/512 bytes at offset 0 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +{"return": ""} +wrote 512/512 bytes at offset 0 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +{"return": ""} +read failed: Input/output error +{"return": ""} +{"return": {}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "DEVICE_TRAY_MOVED", "data": {"device": "ide1-cd0", "tray-open": true}} +{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "DEVICE_TRAY_MOVED", "data": {"device": "floppy0", "tray-open": true}} + +*** done diff --git a/tests/qemu-iotests/072 b/tests/qemu-iotests/072 new file mode 100755 index 0000000..a3876c2 --- /dev/null +++ b/tests/qemu-iotests/072 @@ -0,0 +1,69 @@ +#!/bin/bash +# +# Test case for nested image formats +# +# Copyright (C) 2013 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +# creator +owner=mreitz@redhat.com + +seq="$(basename $0)" +echo "QA output created by $seq" + +here="$PWD" +tmp=/tmp/$$ +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter + +_supported_fmt vpc vmdk vhdx vdi qed qcow2 qcow cow +_supported_proto generic +_supported_os Linux + +IMG_SIZE=64M + +echo +echo "=== Testing nested image formats ===" +echo + +TEST_IMG="$TEST_IMG.base" _make_test_img $IMG_SIZE + +$QEMU_IO -c 'write -P 42 0 512' -c 'write -P 23 512 512' \ + -c 'write -P 66 1024 512' "$TEST_IMG.base" | _filter_qemu_io + +$QEMU_IMG convert -f raw -O $IMGFMT "$TEST_IMG.base" "$TEST_IMG" + +$QEMU_IO -c "open -o driver=$IMGFMT,file.driver=$IMGFMT,file.file.filename=$TEST_IMG" \ + -c 'read -P 42 0 512' -c 'read -P 23 512 512' \ + -c 'read -P 66 1024 512' | _filter_qemu_io + +# When not giving any format, qemu should open only one "layer". Therefore, this +# should not work for any image formats with a header. +$QEMU_IO -c 'read -P 42 0 512' "$TEST_IMG" | _filter_qemu_io + +# success, all done +echo "*** done" +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/072.out b/tests/qemu-iotests/072.out new file mode 100644 index 0000000..efe577c --- /dev/null +++ b/tests/qemu-iotests/072.out @@ -0,0 +1,21 @@ +QA output created by 072 + +=== Testing nested image formats === + +Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864 +wrote 512/512 bytes at offset 0 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 512/512 bytes at offset 512 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 512/512 bytes at offset 1024 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 0 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 512 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 1024 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Pattern verification failed at offset 0, 512 bytes +read 512/512 bytes at offset 0 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +*** done diff --git a/tests/qemu-iotests/077 b/tests/qemu-iotests/077 new file mode 100755 index 0000000..bbf7b51 --- /dev/null +++ b/tests/qemu-iotests/077 @@ -0,0 +1,278 @@ +#!/bin/bash +# +# Test concurrent pread/pwrite +# +# Copyright (C) 2014 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# + +# creator +owner=kwolf@redhat.com + +seq=`basename $0` +echo "QA output created by $seq" + +here=`pwd` +tmp=/tmp/$$ +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter + +_supported_fmt generic +_supported_proto generic +_supported_os Linux + +CLUSTER_SIZE=4k +size=128M + +_make_test_img $size + +echo +echo "== Some concurrent requests involving RMW ==" + +function test_io() +{ +echo "open -o file.align=4k blkdebug::$TEST_IMG" +# A simple RMW request +cat <<EOF +aio_write -P 10 0x200 0x200 +aio_flush +EOF + +# Sequential RMW requests on the same physical sector +off=0x1000 +for ev in "head" "after_head" "tail" "after_tail"; do +cat <<EOF +break pwritev_rmw.$ev A +aio_write -P 10 $((off + 0x200)) 0x200 +wait_break A +aio_write -P 11 $((off + 0x400)) 0x200 +sleep 100 +resume A +aio_flush +EOF +off=$((off + 0x1000)) +done + +# Chained dependencies +cat <<EOF +break pwritev_rmw.after_tail A +aio_write -P 10 0x5000 0x200 +wait_break A +aio_write -P 11 0x5200 0x200 +aio_write -P 12 0x5400 0x200 +aio_write -P 13 0x5600 0x200 +aio_write -P 14 0x5800 0x200 +aio_write -P 15 0x5a00 0x200 +aio_write -P 16 0x5c00 0x200 +aio_write -P 17 0x5e00 0x200 +sleep 100 +resume A +aio_flush +EOF + +# Overlapping multiple requests +cat <<EOF +break pwritev_rmw.after_tail A +aio_write -P 10 0x6000 0x200 +wait_break A +break pwritev_rmw.after_head B +aio_write -P 10 0x7e00 0x200 +wait_break B +aio_write -P 11 0x6800 0x1000 +resume A +sleep 100 +resume B +aio_flush +EOF + +cat <<EOF +break pwritev_rmw.after_tail A +aio_write -P 10 0x8000 0x200 +wait_break A +break pwritev_rmw.after_head B +aio_write -P 10 0x9e00 0x200 +wait_break B +aio_write -P 11 0x8800 0x1000 +resume B +sleep 100 +resume A +aio_flush +EOF + +cat <<EOF +break pwritev_rmw.after_tail A +aio_write -P 10 0xa000 0x200 +wait_break A +aio_write -P 11 0xa800 0x1000 +break pwritev_rmw.after_head B +aio_write -P 10 0xbe00 0x200 +wait_break B +resume A +sleep 100 +resume B +aio_flush +EOF + +cat <<EOF +break pwritev_rmw.after_tail A +aio_write -P 10 0xc000 0x200 +wait_break A +aio_write -P 11 0xc800 0x1000 +break pwritev_rmw.after_head B +aio_write -P 10 0xde00 0x200 +wait_break B +resume B +sleep 100 +resume A +aio_flush +EOF + +# Only RMW for the tail part +cat <<EOF +break pwritev_rmw.after_tail A +aio_write -P 10 0xe000 0x1800 +wait_break A +aio_write -P 11 0xf000 0xc00 +sleep 100 +resume A +aio_flush +EOF + +cat <<EOF +break pwritev A +aio_write -P 10 0x10000 0x800 +wait_break A +break pwritev_rmw.after_tail B +aio_write -P 11 0x10000 0x400 +break pwritev_done C +resume A +wait_break C +resume C +sleep 100 +wait_break B +resume B +aio_flush +EOF + +cat <<EOF +break pwritev A +aio_write -P 10 0x11000 0x800 +wait_break A +aio_write -P 11 0x11000 0x1000 +sleep 100 +resume A +aio_flush +EOF +} + +test_io | $QEMU_IO | _filter_qemu_io | \ + sed -e 's,[0-9/]* bytes at offset [0-9]*,XXX/XXX bytes at offset XXX,g' \ + -e 's/^[0-9]* \(bytes\|KiB\)/XXX bytes/' \ + -e '/Suspended/d' + +echo +echo "== Verify image content ==" + +function verify_io() +{ + # A simple RMW request + echo read -P 0 0 0x200 + echo read -P 10 0x200 0x200 + echo read -P 0 0x400 0xc00 + + # Sequential RMW requests on the same physical sector + echo read -P 0 0x1000 0x200 + echo read -P 10 0x1200 0x200 + echo read -P 11 0x1400 0x200 + echo read -P 0 0x1600 0xa00 + + echo read -P 0 0x2000 0x200 + echo read -P 10 0x2200 0x200 + echo read -P 11 0x2400 0x200 + echo read -P 0 0x2600 0xa00 + + echo read -P 0 0x3000 0x200 + echo read -P 10 0x3200 0x200 + echo read -P 11 0x3400 0x200 + echo read -P 0 0x3600 0xa00 + + echo read -P 0 0x4000 0x200 + echo read -P 10 0x4200 0x200 + echo read -P 11 0x4400 0x200 + echo read -P 0 0x4600 0xa00 + + # Chained dependencies + echo read -P 10 0x5000 0x200 + echo read -P 11 0x5200 0x200 + echo read -P 12 0x5400 0x200 + echo read -P 13 0x5600 0x200 + echo read -P 14 0x5800 0x200 + echo read -P 15 0x5a00 0x200 + echo read -P 16 0x5c00 0x200 + echo read -P 17 0x5e00 0x200 + + # Overlapping multiple requests + echo read -P 10 0x6000 0x200 + echo read -P 0 0x6200 0x600 + echo read -P 11 0x6800 0x1000 + echo read -P 0 0x7800 0x600 + echo read -P 10 0x7e00 0x200 + + echo read -P 10 0x8000 0x200 + echo read -P 0 0x8200 0x600 + echo read -P 11 0x8800 0x1000 + echo read -P 0 0x9800 0x600 + echo read -P 10 0x9e00 0x200 + + echo read -P 10 0xa000 0x200 + echo read -P 0 0xa200 0x600 + echo read -P 11 0xa800 0x1000 + echo read -P 0 0xb800 0x600 + echo read -P 10 0xbe00 0x200 + + echo read -P 10 0xc000 0x200 + echo read -P 0 0xc200 0x600 + echo read -P 11 0xc800 0x1000 + echo read -P 0 0xd800 0x600 + echo read -P 10 0xde00 0x200 + + # Only RMW for the tail part + echo read -P 10 0xe000 0x1000 + echo read -P 11 0xf800 0x400 + echo read -P 0 0xfc00 0x400 + + echo read -P 11 0x10000 0x400 + echo read -P 10 0x10400 0x400 + + echo read -P 11 0x11800 0x800 +} + +verify_io | $QEMU_IO "$TEST_IMG" | _filter_qemu_io + +_check_test_img + +# success, all done +echo "*** done" +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/077.out b/tests/qemu-iotests/077.out new file mode 100644 index 0000000..ab61234 --- /dev/null +++ b/tests/qemu-iotests/077.out @@ -0,0 +1,202 @@ +QA output created by 077 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 + +== Some concurrent requests involving RMW == +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +blkdebug: Resuming request 'A' +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +blkdebug: Resuming request 'A' +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +blkdebug: Resuming request 'A' +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +blkdebug: Resuming request 'A' +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +blkdebug: Resuming request 'A' +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +blkdebug: Resuming request 'A' +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +blkdebug: Resuming request 'B' +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +blkdebug: Resuming request 'B' +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +blkdebug: Resuming request 'A' +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +blkdebug: Resuming request 'A' +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +blkdebug: Resuming request 'B' +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +blkdebug: Resuming request 'B' +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +blkdebug: Resuming request 'A' +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +blkdebug: Resuming request 'A' +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +blkdebug: Resuming request 'A' +blkdebug: Resuming request 'C' +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +blkdebug: Resuming request 'B' +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +blkdebug: Resuming request 'A' +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote XXX/XXX bytes at offset XXX +XXX bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +== Verify image content == +read 512/512 bytes at offset 0 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 512 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 3072/3072 bytes at offset 1024 +3 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 4096 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 4608 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 5120 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 2560/2560 bytes at offset 5632 +2.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 8192 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 8704 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 9216 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 2560/2560 bytes at offset 9728 +2.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 12288 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 12800 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 13312 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 2560/2560 bytes at offset 13824 +2.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 16384 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 16896 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 17408 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 2560/2560 bytes at offset 17920 +2.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 20480 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 20992 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 21504 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 22016 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 22528 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 23040 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 23552 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 24064 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 24576 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 1536/1536 bytes at offset 25088 +1.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 4096/4096 bytes at offset 26624 +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 1536/1536 bytes at offset 30720 +1.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 32256 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 32768 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 1536/1536 bytes at offset 33280 +1.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 4096/4096 bytes at offset 34816 +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 1536/1536 bytes at offset 38912 +1.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 40448 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 40960 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 1536/1536 bytes at offset 41472 +1.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 4096/4096 bytes at offset 43008 +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 1536/1536 bytes at offset 47104 +1.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 48640 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 49152 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 1536/1536 bytes at offset 49664 +1.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 4096/4096 bytes at offset 51200 +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 1536/1536 bytes at offset 55296 +1.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 56832 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 4096/4096 bytes at offset 57344 +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 1024/1024 bytes at offset 63488 +1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 1024/1024 bytes at offset 64512 +1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 1024/1024 bytes at offset 65536 +1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 1024/1024 bytes at offset 66560 +1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 2048/2048 bytes at offset 71680 +2 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +No errors were found on the image. +*** done diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc index 28ba0d9..0f68156 100644 --- a/tests/qemu-iotests/common.rc +++ b/tests/qemu-iotests/common.rc @@ -170,6 +170,17 @@ _make_test_img() fi } +_rm_test_img() +{ + local img=$1 + if [ "$IMGFMT" = "vmdk" ]; then + # Remove all the extents for vmdk + $QEMU_IMG info $img 2>/dev/null | grep 'filename:' | cut -f 2 -d: \ + | xargs -I {} rm -f "{}" + fi + rm -f $img +} + _cleanup_test_img() { case "$IMGPROTO" in @@ -179,9 +190,9 @@ _cleanup_test_img() rm -f "$TEST_IMG_FILE" ;; file) - rm -f "$TEST_DIR/t.$IMGFMT" - rm -f "$TEST_DIR/t.$IMGFMT.orig" - rm -f "$TEST_DIR/t.$IMGFMT.base" + _rm_test_img "$TEST_DIR/t.$IMGFMT" + _rm_test_img "$TEST_DIR/t.$IMGFMT.orig" + _rm_test_img "$TEST_DIR/t.$IMGFMT.base" if [ -n "$SAMPLE_IMG_FILE" ] then rm -f "$TEST_DIR/$SAMPLE_IMG_FILE" @@ -406,6 +417,17 @@ _default_cache_mode() fi } +_unsupported_imgopts() +{ + for bad_opt + do + if echo "$IMGOPTS" | grep -q 2>/dev/null "$bad_opt" + then + _notrun "not suitable for image option: $bad_opt" + fi + done +} + # this test requires that a specified command (executable) exists # _require_command() diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index cc750c9..03c762f 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -77,5 +77,8 @@ 068 rw auto 069 rw auto 070 rw auto +071 rw auto +072 rw auto 073 rw auto 074 rw auto +077 rw auto diff --git a/util/Makefile.objs b/util/Makefile.objs index af3e5cb..937376b 100644 --- a/util/Makefile.objs +++ b/util/Makefile.objs @@ -13,3 +13,4 @@ util-obj-y += hexdump.o util-obj-y += crc32c.o util-obj-y += throttle.o util-obj-y += getauxval.o +util-obj-y += readline.o diff --git a/util/oslib-posix.c b/util/oslib-posix.c index e00a44c..d5dca47 100644 --- a/util/oslib-posix.c +++ b/util/oslib-posix.c @@ -47,6 +47,9 @@ extern int daemon(int, int); # define QEMU_VMALLOC_ALIGN getpagesize() #endif +#include <termios.h> +#include <unistd.h> + #include <glib/gprintf.h> #include "config-host.h" @@ -85,6 +88,11 @@ void *qemu_oom_check(void *ptr) void *qemu_memalign(size_t alignment, size_t size) { void *ptr; + + if (alignment < sizeof(void*)) { + alignment = sizeof(void*); + } + #if defined(_POSIX_C_SOURCE) && !defined(__sun__) int ret; ret = posix_memalign(&ptr, alignment, size); @@ -251,3 +259,18 @@ qemu_get_local_state_pathname(const char *relative_pathname) return g_strdup_printf("%s/%s", CONFIG_QEMU_LOCALSTATEDIR, relative_pathname); } + +void qemu_set_tty_echo(int fd, bool echo) +{ + struct termios tty; + + tcgetattr(fd, &tty); + + if (echo) { + tty.c_lflag |= ECHO | ECHONL | ICANON | IEXTEN; + } else { + tty.c_lflag &= ~(ECHO | ECHONL | ICANON | IEXTEN); + } + + tcsetattr(fd, TCSANOW, &tty); +} diff --git a/util/oslib-win32.c b/util/oslib-win32.c index 776ccfa..50be044 100644 --- a/util/oslib-win32.c +++ b/util/oslib-win32.c @@ -189,3 +189,22 @@ qemu_get_local_state_pathname(const char *relative_pathname) return g_strdup_printf("%s" G_DIR_SEPARATOR_S "%s", base_path, relative_pathname); } + +void qemu_set_tty_echo(int fd, bool echo) +{ + HANDLE handle = (HANDLE)_get_osfhandle(fd); + DWORD dwMode = 0; + + if (handle == INVALID_HANDLE_VALUE) { + return; + } + + GetConsoleMode(handle, &dwMode); + + if (echo) { + SetConsoleMode(handle, dwMode | ENABLE_ECHO_INPUT | ENABLE_LINE_INPUT); + } else { + SetConsoleMode(handle, + dwMode & ~(ENABLE_ECHO_INPUT | ENABLE_LINE_INPUT)); + } +} diff --git a/util/qemu-config.c b/util/qemu-config.c index 7973659..9298f55 100644 --- a/util/qemu-config.c +++ b/util/qemu-config.c @@ -356,3 +356,103 @@ int qemu_read_config_file(const char *filename) return -EINVAL; } } + +static void config_parse_qdict_section(QDict *options, QemuOptsList *opts, + Error **errp) +{ + QemuOpts *subopts; + QDict *subqdict; + QList *list = NULL; + Error *local_err = NULL; + size_t orig_size, enum_size; + char *prefix; + + prefix = g_strdup_printf("%s.", opts->name); + qdict_extract_subqdict(options, &subqdict, prefix); + g_free(prefix); + orig_size = qdict_size(subqdict); + if (!orig_size) { + goto out; + } + + subopts = qemu_opts_create(opts, NULL, 0, &local_err); + if (error_is_set(&local_err)) { + error_propagate(errp, local_err); + goto out; + } + + qemu_opts_absorb_qdict(subopts, subqdict, &local_err); + if (error_is_set(&local_err)) { + error_propagate(errp, local_err); + goto out; + } + + enum_size = qdict_size(subqdict); + if (enum_size < orig_size && enum_size) { + error_setg(errp, "Unknown option '%s' for [%s]", + qdict_first(subqdict)->key, opts->name); + goto out; + } + + if (enum_size) { + /* Multiple, enumerated sections */ + QListEntry *list_entry; + unsigned i = 0; + + /* Not required anymore */ + qemu_opts_del(subopts); + + qdict_array_split(subqdict, &list); + if (qdict_size(subqdict)) { + error_setg(errp, "Unused option '%s' for [%s]", + qdict_first(subqdict)->key, opts->name); + goto out; + } + + QLIST_FOREACH_ENTRY(list, list_entry) { + QDict *section = qobject_to_qdict(qlist_entry_obj(list_entry)); + char *opt_name; + + opt_name = g_strdup_printf("%s.%u", opts->name, i++); + subopts = qemu_opts_create(opts, opt_name, 1, &local_err); + g_free(opt_name); + if (error_is_set(&local_err)) { + error_propagate(errp, local_err); + goto out; + } + + qemu_opts_absorb_qdict(subopts, section, &local_err); + if (error_is_set(&local_err)) { + error_propagate(errp, local_err); + qemu_opts_del(subopts); + goto out; + } + + if (qdict_size(section)) { + error_setg(errp, "[%s] section doesn't support the option '%s'", + opts->name, qdict_first(section)->key); + qemu_opts_del(subopts); + goto out; + } + } + } + +out: + QDECREF(subqdict); + QDECREF(list); +} + +void qemu_config_parse_qdict(QDict *options, QemuOptsList **lists, + Error **errp) +{ + int i; + Error *local_err = NULL; + + for (i = 0; lists[i]; i++) { + config_parse_qdict_section(options, lists[i], &local_err); + if (error_is_set(&local_err)) { + error_propagate(errp, local_err); + return; + } + } +} diff --git a/util/qemu-progress.c b/util/qemu-progress.c index 9a3f96c..4ee5cd0 100644 --- a/util/qemu-progress.c +++ b/util/qemu-progress.c @@ -24,7 +24,6 @@ #include "qemu-common.h" #include "qemu/osdep.h" -#include "sysemu/sysemu.h" #include <stdio.h> struct progress_state { @@ -83,12 +82,22 @@ static void progress_dummy_init(void) { #ifdef CONFIG_POSIX struct sigaction action; + sigset_t set; memset(&action, 0, sizeof(action)); sigfillset(&action.sa_mask); action.sa_handler = sigusr_print; action.sa_flags = 0; sigaction(SIGUSR1, &action, NULL); + + /* + * SIGUSR1 is SIG_IPI and gets blocked in qemu_init_main_loop(). In the + * tools that use the progress report SIGUSR1 isn't used in this meaning + * and instead should print the progress, so reenable it. + */ + sigemptyset(&set); + sigaddset(&set, SIGUSR1); + pthread_sigmask(SIG_UNBLOCK, &set, NULL); #endif state.print = progress_dummy_print; diff --git a/readline.c b/util/readline.c index abf27dd..8441be4 100644 --- a/readline.c +++ b/util/readline.c @@ -21,21 +21,19 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ -#include "monitor/readline.h" -#include "monitor/monitor.h" + +#include "qemu-common.h" +#include "qemu/readline.h" #define IS_NORM 0 #define IS_ESC 1 #define IS_CSI 2 #define IS_SS3 3 -#undef printf -#define printf do_not_use_printf - void readline_show_prompt(ReadLineState *rs) { - monitor_printf(rs->mon, "%s", rs->prompt); - monitor_flush(rs->mon); + rs->printf_func(rs->opaque, "%s", rs->prompt); + rs->flush_func(rs->opaque); rs->last_cmd_buf_index = 0; rs->last_cmd_buf_size = 0; rs->esc_state = IS_NORM; @@ -49,17 +47,17 @@ static void readline_update(ReadLineState *rs) if (rs->cmd_buf_size != rs->last_cmd_buf_size || memcmp(rs->cmd_buf, rs->last_cmd_buf, rs->cmd_buf_size) != 0) { for(i = 0; i < rs->last_cmd_buf_index; i++) { - monitor_printf(rs->mon, "\033[D"); + rs->printf_func(rs->opaque, "\033[D"); } rs->cmd_buf[rs->cmd_buf_size] = '\0'; if (rs->read_password) { len = strlen(rs->cmd_buf); for(i = 0; i < len; i++) - monitor_printf(rs->mon, "*"); + rs->printf_func(rs->opaque, "*"); } else { - monitor_printf(rs->mon, "%s", rs->cmd_buf); + rs->printf_func(rs->opaque, "%s", rs->cmd_buf); } - monitor_printf(rs->mon, "\033[K"); + rs->printf_func(rs->opaque, "\033[K"); memcpy(rs->last_cmd_buf, rs->cmd_buf, rs->cmd_buf_size); rs->last_cmd_buf_size = rs->cmd_buf_size; rs->last_cmd_buf_index = rs->cmd_buf_size; @@ -68,17 +66,17 @@ static void readline_update(ReadLineState *rs) delta = rs->cmd_buf_index - rs->last_cmd_buf_index; if (delta > 0) { for(i = 0;i < delta; i++) { - monitor_printf(rs->mon, "\033[C"); + rs->printf_func(rs->opaque, "\033[C"); } } else { delta = -delta; for(i = 0;i < delta; i++) { - monitor_printf(rs->mon, "\033[D"); + rs->printf_func(rs->opaque, "\033[D"); } } rs->last_cmd_buf_index = rs->cmd_buf_index; } - monitor_flush(rs->mon); + rs->flush_func(rs->opaque); } static void readline_insert_char(ReadLineState *rs, int ch) @@ -284,7 +282,7 @@ static void readline_completion(ReadLineState *rs) cmdline = g_malloc(rs->cmd_buf_index + 1); memcpy(cmdline, rs->cmd_buf, rs->cmd_buf_index); cmdline[rs->cmd_buf_index] = '\0'; - rs->completion_finder(rs->mon, cmdline); + rs->completion_finder(rs->opaque, cmdline); g_free(cmdline); /* no completion found */ @@ -299,7 +297,7 @@ static void readline_completion(ReadLineState *rs) if (len > 0 && rs->completions[0][len - 1] != '/') readline_insert_char(rs, ' '); } else { - monitor_printf(rs->mon, "\n"); + rs->printf_func(rs->opaque, "\n"); max_width = 0; max_prefix = 0; for(i = 0; i < rs->nb_completions; i++) { @@ -329,9 +327,9 @@ static void readline_completion(ReadLineState *rs) nb_cols = 80 / max_width; j = 0; for(i = 0; i < rs->nb_completions; i++) { - monitor_printf(rs->mon, "%-*s", max_width, rs->completions[i]); + rs->printf_func(rs->opaque, "%-*s", max_width, rs->completions[i]); if (++j == nb_cols || i == (rs->nb_completions - 1)) { - monitor_printf(rs->mon, "\n"); + rs->printf_func(rs->opaque, "\n"); j = 0; } } @@ -365,12 +363,12 @@ void readline_handle_byte(ReadLineState *rs, int ch) rs->cmd_buf[rs->cmd_buf_size] = '\0'; if (!rs->read_password) readline_hist_add(rs, rs->cmd_buf); - monitor_printf(rs->mon, "\n"); + rs->printf_func(rs->opaque, "\n"); rs->cmd_buf_index = 0; rs->cmd_buf_size = 0; rs->last_cmd_buf_index = 0; rs->last_cmd_buf_size = 0; - rs->readline_func(rs->mon, rs->cmd_buf, rs->readline_opaque); + rs->readline_func(rs->opaque, rs->cmd_buf, rs->readline_opaque); break; case 23: /* ^W */ @@ -480,13 +478,17 @@ const char *readline_get_history(ReadLineState *rs, unsigned int index) return rs->history[index]; } -ReadLineState *readline_init(Monitor *mon, +ReadLineState *readline_init(ReadLinePrintfFunc *printf_func, + ReadLineFlushFunc *flush_func, + void *opaque, ReadLineCompletionFunc *completion_finder) { ReadLineState *rs = g_malloc0(sizeof(*rs)); rs->hist_entry = -1; - rs->mon = mon; + rs->opaque = opaque; + rs->printf_func = printf_func; + rs->flush_func = flush_func; rs->completion_finder = completion_finder; return rs; |